From cb6bbd7a47b36a8ebe8555fd635c3ec12224b283 Mon Sep 17 00:00:00 2001 From: Charlie Le Date: Fri, 27 Feb 2026 14:07:38 -0800 Subject: [PATCH 01/13] Add per-tenant Grafana Explore URL format for alert GeneratorURL Add support for tenants to configure alert GeneratorURL to use Grafana Explore format instead of the default Prometheus /graph format. This is controlled by three new per-tenant settings: ruler_alert_generator_url_format, ruler_grafana_datasource_uid, and ruler_grafana_org_id. Co-Authored-By: Claude Opus 4.6 Signed-off-by: Charlie Le --- docs/configuration/config-file-reference.md | 16 +++ pkg/ruler/compat.go | 21 +++- pkg/ruler/external_url.go | 56 +++++++++++ pkg/ruler/external_url_test.go | 67 +++++++++++++ pkg/ruler/manager.go | 13 ++- pkg/ruler/ruler.go | 34 ++++++- pkg/ruler/ruler_test.go | 103 +++++++++++++++++--- pkg/util/validation/exporter_test.go | 1 + pkg/util/validation/limits.go | 22 ++++- schemas/cortex-config-schema.json | 17 ++++ 10 files changed, 326 insertions(+), 24 deletions(-) create mode 100644 pkg/ruler/external_url.go create mode 100644 pkg/ruler/external_url_test.go diff --git a/docs/configuration/config-file-reference.md b/docs/configuration/config-file-reference.md index 81b85fb018f..968ca55bb04 100644 --- a/docs/configuration/config-file-reference.md +++ b/docs/configuration/config-file-reference.md @@ -4329,6 +4329,22 @@ query_rejection: # external labels for alerting rules [ruler_external_labels: | default = []] +# Per-tenant external URL for the ruler. If set, it overrides the global +# -ruler.external.url for this tenant's alert notifications. +[ruler_external_url: | default = ""] + +# Format for alert generator URLs. Supported values: prometheus (default), +# grafana-explore. +[ruler_alert_generator_url_format: | default = ""] + +# Grafana datasource UID for alert generator URLs when format is +# grafana-explore. +[ruler_grafana_datasource_uid: | default = ""] + +# Grafana organization ID for alert generator URLs when format is +# grafana-explore. +[ruler_grafana_org_id: | default = 1] + # Enable to allow rules to be evaluated with data from a single zone, if other # zones are not available. [rules_partial_data: | default = false] diff --git a/pkg/ruler/compat.go b/pkg/ruler/compat.go index 0dc5c0210eb..2e471ef7f0f 100644 --- a/pkg/ruler/compat.go +++ b/pkg/ruler/compat.go @@ -19,6 +19,7 @@ import ( "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/rules" "github.com/prometheus/prometheus/storage" + "github.com/prometheus/prometheus/util/strutil" "github.com/weaveworks/common/httpgrpc" "github.com/weaveworks/common/user" @@ -164,6 +165,10 @@ type RulesLimits interface { RulerQueryOffset(userID string) time.Duration DisabledRuleGroups(userID string) validation.DisabledRuleGroups RulerExternalLabels(userID string) labels.Labels + RulerExternalURL(userID string) string + RulerAlertGeneratorURLFormat(userID string) string + RulerGrafanaDatasourceUID(userID string) string + RulerGrafanaOrgID(userID string) int64 } type QueryExecutor func(ctx context.Context, qs string, t time.Time) (promql.Vector, error) @@ -373,7 +378,21 @@ func DefaultTenantManagerFactory(cfg Config, p Pusher, q storage.Queryable, engi QueryFunc: queryFunc, Context: prometheusContext, ExternalURL: cfg.ExternalURL.URL, - NotifyFunc: SendAlerts(notifier, cfg.ExternalURL.URL.String()), + NotifyFunc: SendAlerts(notifier, func(expr string) string { + externalURL := cfg.ExternalURL.String() + if tenantURL := overrides.RulerExternalURL(userID); tenantURL != "" { + externalURL = tenantURL + } + if overrides.RulerAlertGeneratorURLFormat(userID) == "grafana-explore" { + datasourceUID := overrides.RulerGrafanaDatasourceUID(userID) + orgID := overrides.RulerGrafanaOrgID(userID) + if orgID == 0 { + orgID = 1 + } + return grafanaExploreLink(externalURL, expr, datasourceUID, orgID) + } + return externalURL + strutil.TableLinkForExpression(expr) + }), Logger: util_log.GoKitLogToSlog(log.With(logger, "user", userID)), Registerer: reg, OutageTolerance: cfg.OutageTolerance, diff --git a/pkg/ruler/external_url.go b/pkg/ruler/external_url.go new file mode 100644 index 00000000000..0928413a889 --- /dev/null +++ b/pkg/ruler/external_url.go @@ -0,0 +1,56 @@ +package ruler + +import ( + "sync" +) + +// userExternalURL tracks per-user resolved external URLs and detects changes. +type userExternalURL struct { + global string + limits RulesLimits + + mtx sync.Mutex + users map[string]string +} + +func newUserExternalURL(global string, limits RulesLimits) *userExternalURL { + return &userExternalURL{ + global: global, + limits: limits, + + mtx: sync.Mutex{}, + users: map[string]string{}, + } +} + +func (e *userExternalURL) update(userID string) (string, bool) { + tenantURL := e.limits.RulerExternalURL(userID) + resolved := e.global + if tenantURL != "" { + resolved = tenantURL + } + + e.mtx.Lock() + defer e.mtx.Unlock() + + if prev, ok := e.users[userID]; ok && prev == resolved { + return resolved, false + } + + e.users[userID] = resolved + return resolved, true +} + +func (e *userExternalURL) remove(user string) { + e.mtx.Lock() + defer e.mtx.Unlock() + delete(e.users, user) +} + +func (e *userExternalURL) cleanup() { + e.mtx.Lock() + defer e.mtx.Unlock() + for user := range e.users { + delete(e.users, user) + } +} diff --git a/pkg/ruler/external_url_test.go b/pkg/ruler/external_url_test.go new file mode 100644 index 00000000000..50b88563e8e --- /dev/null +++ b/pkg/ruler/external_url_test.go @@ -0,0 +1,67 @@ +package ruler + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestUserExternalURL(t *testing.T) { + limits := ruleLimits{} + e := newUserExternalURL("http://global:9090", &limits) + + const userID = "test-user" + + t.Run("global URL used when no per-tenant override", func(t *testing.T) { + e.remove(userID) + url, changed := e.update(userID) + require.True(t, changed) + require.Equal(t, "http://global:9090", url) + }) + + t.Run("no change on second update", func(t *testing.T) { + url, changed := e.update(userID) + require.False(t, changed) + require.Equal(t, "http://global:9090", url) + }) + + t.Run("per-tenant URL overrides global", func(t *testing.T) { + limits.mtx.Lock() + limits.externalURL = "http://tenant:3000" + limits.mtx.Unlock() + + url, changed := e.update(userID) + require.True(t, changed) + require.Equal(t, "http://tenant:3000", url) + }) + + t.Run("no change when per-tenant URL is the same", func(t *testing.T) { + url, changed := e.update(userID) + require.False(t, changed) + require.Equal(t, "http://tenant:3000", url) + }) + + t.Run("revert to global when per-tenant override removed", func(t *testing.T) { + limits.mtx.Lock() + limits.externalURL = "" + limits.mtx.Unlock() + + url, changed := e.update(userID) + require.True(t, changed) + require.Equal(t, "http://global:9090", url) + }) + + t.Run("remove and cleanup lifecycle", func(t *testing.T) { + e.remove(userID) + // After remove, next update should report changed + url, changed := e.update(userID) + require.True(t, changed) + require.Equal(t, "http://global:9090", url) + + e.cleanup() + // After cleanup, next update should report changed + url, changed = e.update(userID) + require.True(t, changed) + require.Equal(t, "http://global:9090", url) + }) +} diff --git a/pkg/ruler/manager.go b/pkg/ruler/manager.go index d44a0d95829..86611201899 100644 --- a/pkg/ruler/manager.go +++ b/pkg/ruler/manager.go @@ -53,6 +53,9 @@ type DefaultMultiTenantManager struct { // Per-user externalLabels. userExternalLabels *userExternalLabels + // Per-user externalURL. + userExternalURL *userExternalURL + // rules backup rulesBackupManager *rulesBackupManager @@ -101,6 +104,7 @@ func NewDefaultMultiTenantManager(cfg Config, limits RulesLimits, managerFactory ruleEvalMetrics: evalMetrics, notifiers: map[string]*rulerNotifier{}, userExternalLabels: newUserExternalLabels(cfg.ExternalLabels, limits), + userExternalURL: newUserExternalURL(cfg.ExternalURL.String(), limits), notifiersDiscoveryMetrics: notifiersDiscoveryMetrics, mapper: newMapper(cfg.RulePath, logger), userManagers: map[string]RulesManager{}, @@ -166,6 +170,7 @@ func (r *DefaultMultiTenantManager) SyncRuleGroups(ctx context.Context, ruleGrou r.removeNotifier(userID) r.mapper.cleanupUser(userID) r.userExternalLabels.remove(userID) + r.userExternalURL.remove(userID) r.lastReloadSuccessful.DeleteLabelValues(userID) r.lastReloadSuccessfulTimestamp.DeleteLabelValues(userID) r.configUpdatesTotal.DeleteLabelValues(userID) @@ -210,6 +215,7 @@ func (r *DefaultMultiTenantManager) syncRulesToManager(ctx context.Context, user return } externalLabels, externalLabelsUpdated := r.userExternalLabels.update(user) + externalURL, externalURLUpdated := r.userExternalURL.update(user) existing := true manager := r.getRulesManager(user, ctx) @@ -222,13 +228,13 @@ func (r *DefaultMultiTenantManager) syncRulesToManager(ctx context.Context, user return } - if !existing || rulesUpdated || externalLabelsUpdated { + if !existing || rulesUpdated || externalLabelsUpdated || externalURLUpdated { level.Debug(r.logger).Log("msg", "updating rules", "user", user) r.configUpdatesTotal.WithLabelValues(user).Inc() - if (rulesUpdated || externalLabelsUpdated) && existing { + if (rulesUpdated || externalLabelsUpdated || externalURLUpdated) && existing { r.updateRuleCache(user, manager.RuleGroups()) } - err = manager.Update(r.cfg.EvaluationInterval, files, externalLabels, r.cfg.ExternalURL.String(), r.ruleGroupIterationFunc) + err = manager.Update(r.cfg.EvaluationInterval, files, externalLabels, externalURL, r.ruleGroupIterationFunc) r.deleteRuleCache(user) if err != nil { r.lastReloadSuccessful.WithLabelValues(user).Set(0) @@ -443,6 +449,7 @@ func (r *DefaultMultiTenantManager) Stop() { // cleanup user rules directories r.mapper.cleanup() r.userExternalLabels.cleanup() + r.userExternalURL.cleanup() } func (m *DefaultMultiTenantManager) ValidateRuleGroup(g rulefmt.RuleGroup) []error { diff --git a/pkg/ruler/ruler.go b/pkg/ruler/ruler.go index 97da8166239..b6db6ec886a 100644 --- a/pkg/ruler/ruler.go +++ b/pkg/ruler/ruler.go @@ -2,6 +2,7 @@ package ruler import ( "context" + "encoding/json" "flag" "fmt" "hash/fnv" @@ -26,7 +27,6 @@ import ( "github.com/prometheus/prometheus/notifier" "github.com/prometheus/prometheus/promql/parser" promRules "github.com/prometheus/prometheus/rules" - "github.com/prometheus/prometheus/util/strutil" "github.com/weaveworks/common/user" "golang.org/x/sync/errgroup" @@ -506,7 +506,7 @@ type sender interface { // It filters any non-firing alerts from the input. // // Copied from Prometheus's main.go. -func SendAlerts(n sender, externalURL string) promRules.NotifyFunc { +func SendAlerts(n sender, generatorURLFn func(expr string) string) promRules.NotifyFunc { return func(ctx context.Context, expr string, alerts ...*promRules.Alert) { var res []*notifier.Alert @@ -515,7 +515,7 @@ func SendAlerts(n sender, externalURL string) promRules.NotifyFunc { StartsAt: alert.FiredAt, Labels: alert.Labels, Annotations: alert.Annotations, - GeneratorURL: externalURL + strutil.TableLinkForExpression(expr), + GeneratorURL: generatorURLFn(expr), } if !alert.ResolvedAt.IsZero() { a.EndsAt = alert.ResolvedAt @@ -531,6 +531,34 @@ func SendAlerts(n sender, externalURL string) promRules.NotifyFunc { } } +// grafanaExploreLink builds a Grafana Explore URL for the given expression. +func grafanaExploreLink(baseURL, expr, datasourceUID string, orgID int64) string { + panes := map[string]any{ + "default": map[string]any{ + "datasource": datasourceUID, + "queries": []map[string]any{ + { + "refId": "A", + "expr": expr, + "datasource": map[string]string{"uid": datasourceUID, "type": "prometheus"}, + "editorMode": "code", + }, + }, + "range": map[string]string{ + "from": "now-1h", + "to": "now", + }, + }, + } + panesJSON, _ := json.Marshal(panes) + + return fmt.Sprintf("%s/explore?schemaVersion=1&panes=%s&orgId=%d", + strings.TrimRight(baseURL, "/"), + url.QueryEscape(string(panesJSON)), + orgID, + ) +} + func ruleGroupDisabled(ruleGroup *rulespb.RuleGroupDesc, disabledRuleGroupsForUser validation.DisabledRuleGroups) bool { for _, disabledRuleGroupForUser := range disabledRuleGroupsForUser { if ruleGroup.Namespace == disabledRuleGroupForUser.Namespace && diff --git a/pkg/ruler/ruler_test.go b/pkg/ruler/ruler_test.go index e5738945cb4..6d7bb861920 100644 --- a/pkg/ruler/ruler_test.go +++ b/pkg/ruler/ruler_test.go @@ -35,6 +35,7 @@ import ( promRules "github.com/prometheus/prometheus/rules" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/util/annotations" + "github.com/prometheus/prometheus/util/strutil" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" @@ -89,14 +90,18 @@ func defaultRulerConfig(t testing.TB) Config { } type ruleLimits struct { - mtx sync.RWMutex - tenantShard float64 - maxRulesPerRuleGroup int - maxRuleGroups int - disabledRuleGroups validation.DisabledRuleGroups - maxQueryLength time.Duration - queryOffset time.Duration - externalLabels labels.Labels + mtx sync.RWMutex + tenantShard float64 + maxRulesPerRuleGroup int + maxRuleGroups int + disabledRuleGroups validation.DisabledRuleGroups + maxQueryLength time.Duration + queryOffset time.Duration + externalLabels labels.Labels + externalURL string + alertGeneratorURLFormat string + grafanaDatasourceUID string + grafanaOrgID int64 } func (r *ruleLimits) setRulerExternalLabels(lset labels.Labels) { @@ -147,6 +152,30 @@ func (r *ruleLimits) RulerExternalLabels(_ string) labels.Labels { return r.externalLabels } +func (r *ruleLimits) RulerExternalURL(_ string) string { + r.mtx.RLock() + defer r.mtx.RUnlock() + return r.externalURL +} + +func (r *ruleLimits) RulerAlertGeneratorURLFormat(_ string) string { + r.mtx.RLock() + defer r.mtx.RUnlock() + return r.alertGeneratorURLFormat +} + +func (r *ruleLimits) RulerGrafanaDatasourceUID(_ string) string { + r.mtx.RLock() + defer r.mtx.RUnlock() + return r.grafanaDatasourceUID +} + +func (r *ruleLimits) RulerGrafanaOrgID(_ string) int64 { + r.mtx.RLock() + defer r.mtx.RUnlock() + return r.grafanaOrgID +} + func newEmptyQueryable() storage.Queryable { return storage.QueryableFunc(func(mint, maxt int64) (storage.Querier, error) { return emptyQuerier{}, nil @@ -2684,10 +2713,13 @@ func (s senderFunc) Send(alerts ...*notifier.Alert) { func TestSendAlerts(t *testing.T) { testCases := []struct { - in []*promRules.Alert - exp []*notifier.Alert + name string + in []*promRules.Alert + exp []*notifier.Alert + generatorURLFn func(expr string) string }{ { + name: "prometheus format with valid until", in: []*promRules.Alert{ { Labels: labels.FromStrings("l1", "v1"), @@ -2706,8 +2738,12 @@ func TestSendAlerts(t *testing.T) { GeneratorURL: "http://localhost:9090/graph?g0.expr=up&g0.tab=1", }, }, + generatorURLFn: func(expr string) string { + return "http://localhost:9090" + strutil.TableLinkForExpression(expr) + }, }, { + name: "prometheus format with resolved at", in: []*promRules.Alert{ { Labels: labels.FromStrings("l1", "v1"), @@ -2726,21 +2762,56 @@ func TestSendAlerts(t *testing.T) { GeneratorURL: "http://localhost:9090/graph?g0.expr=up&g0.tab=1", }, }, + generatorURLFn: func(expr string) string { + return "http://localhost:9090" + strutil.TableLinkForExpression(expr) + }, }, { - in: []*promRules.Alert{}, + name: "empty alerts", + in: []*promRules.Alert{}, + generatorURLFn: func(expr string) string { + return "http://localhost:9090" + strutil.TableLinkForExpression(expr) + }, + }, + { + name: "grafana explore format", + in: []*promRules.Alert{ + { + Labels: labels.FromStrings("l1", "v1"), + Annotations: labels.FromStrings("a2", "v2"), + ActiveAt: time.Unix(1, 0), + FiredAt: time.Unix(2, 0), + ValidUntil: time.Unix(3, 0), + }, + }, + generatorURLFn: func(expr string) string { + return grafanaExploreLink("http://grafana.example.com", expr, "my-datasource", 1) + }, }, } - for i, tc := range testCases { - t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { - senderFunc := senderFunc(func(alerts ...*notifier.Alert) { + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + var received []*notifier.Alert + sf := senderFunc(func(alerts ...*notifier.Alert) { if len(tc.in) == 0 { t.Fatalf("sender called with 0 alert") } - require.Equal(t, tc.exp, alerts) + received = alerts + if tc.exp != nil { + require.Equal(t, tc.exp, alerts) + } }) - SendAlerts(senderFunc, "http://localhost:9090")(context.TODO(), "up", tc.in...) + SendAlerts(sf, tc.generatorURLFn)(context.TODO(), "up", tc.in...) + + // Additional checks for grafana explore format + if tc.name == "grafana explore format" { + require.Len(t, received, 1) + require.Contains(t, received[0].GeneratorURL, "/explore?schemaVersion=1&panes=") + require.Contains(t, received[0].GeneratorURL, "orgId=1") + require.Contains(t, received[0].GeneratorURL, "my-datasource") + require.Contains(t, received[0].GeneratorURL, "up") + } }) } } diff --git a/pkg/util/validation/exporter_test.go b/pkg/util/validation/exporter_test.go index 01f96b92750..5e14e37f9ef 100644 --- a/pkg/util/validation/exporter_test.go +++ b/pkg/util/validation/exporter_test.go @@ -107,6 +107,7 @@ func TestOverridesExporter_withConfig(t *testing.T) { cortex_overrides{limit_name="reject_old_samples",user="tenant-a"} 0 cortex_overrides{limit_name="reject_old_samples_max_age",user="tenant-a"} 1.2096e+06 cortex_overrides{limit_name="ruler_evaluation_delay_duration",user="tenant-a"} 0 + cortex_overrides{limit_name="ruler_grafana_org_id",user="tenant-a"} 0 cortex_overrides{limit_name="ruler_max_rule_groups_per_tenant",user="tenant-a"} 0 cortex_overrides{limit_name="ruler_max_rules_per_rule_group",user="tenant-a"} 0 cortex_overrides{limit_name="ruler_query_offset",user="tenant-a"} 0 diff --git a/pkg/util/validation/limits.go b/pkg/util/validation/limits.go index 2f14b5cab8c..3c8a464671d 100644 --- a/pkg/util/validation/limits.go +++ b/pkg/util/validation/limits.go @@ -214,7 +214,11 @@ type Limits struct { RulerMaxRuleGroupsPerTenant int `yaml:"ruler_max_rule_groups_per_tenant" json:"ruler_max_rule_groups_per_tenant"` RulerQueryOffset model.Duration `yaml:"ruler_query_offset" json:"ruler_query_offset"` RulerExternalLabels labels.Labels `yaml:"ruler_external_labels" json:"ruler_external_labels" doc:"nocli|description=external labels for alerting rules"` - RulesPartialData bool `yaml:"rules_partial_data" json:"rules_partial_data" doc:"nocli|description=Enable to allow rules to be evaluated with data from a single zone, if other zones are not available.|default=false"` + RulerExternalURL string `yaml:"ruler_external_url" json:"ruler_external_url" doc:"nocli|description=Per-tenant external URL for the ruler. If set, it overrides the global -ruler.external.url for this tenant's alert notifications."` + RulerAlertGeneratorURLFormat string `yaml:"ruler_alert_generator_url_format" json:"ruler_alert_generator_url_format" doc:"nocli|description=Format for alert generator URLs. Supported values: prometheus (default), grafana-explore."` + RulerGrafanaDatasourceUID string `yaml:"ruler_grafana_datasource_uid" json:"ruler_grafana_datasource_uid" doc:"nocli|description=Grafana datasource UID for alert generator URLs when format is grafana-explore."` + RulerGrafanaOrgID int64 `yaml:"ruler_grafana_org_id" json:"ruler_grafana_org_id" doc:"nocli|description=Grafana organization ID for alert generator URLs when format is grafana-explore.|default=1"` + RulesPartialData bool `yaml:"rules_partial_data" json:"rules_partial_data" doc:"nocli|description=Enable to allow rules to be evaluated with data from a single zone, if other zones are not available.|default=false"` // Store-gateway. StoreGatewayTenantShardSize float64 `yaml:"store_gateway_tenant_shard_size" json:"store_gateway_tenant_shard_size"` @@ -1144,6 +1148,22 @@ func (o *Overrides) RulerExternalLabels(userID string) labels.Labels { return o.GetOverridesForUser(userID).RulerExternalLabels } +func (o *Overrides) RulerExternalURL(userID string) string { + return o.GetOverridesForUser(userID).RulerExternalURL +} + +func (o *Overrides) RulerAlertGeneratorURLFormat(userID string) string { + return o.GetOverridesForUser(userID).RulerAlertGeneratorURLFormat +} + +func (o *Overrides) RulerGrafanaDatasourceUID(userID string) string { + return o.GetOverridesForUser(userID).RulerGrafanaDatasourceUID +} + +func (o *Overrides) RulerGrafanaOrgID(userID string) int64 { + return o.GetOverridesForUser(userID).RulerGrafanaOrgID +} + // MaxRegexPatternLength returns the maximum length of an unoptimized regex pattern. // This is only used in Ingester. func (o *Overrides) MaxRegexPatternLength(userID string) int { diff --git a/schemas/cortex-config-schema.json b/schemas/cortex-config-schema.json index dfbd85f685c..82c5933f89f 100644 --- a/schemas/cortex-config-schema.json +++ b/schemas/cortex-config-schema.json @@ -5475,6 +5475,10 @@ "x-cli-flag": "validation.reject-old-samples.max-age", "x-format": "duration" }, + "ruler_alert_generator_url_format": { + "description": "Format for alert generator URLs. Supported values: prometheus (default), grafana-explore.", + "type": "string" + }, "ruler_evaluation_delay_duration": { "default": "0s", "description": "Deprecated(use ruler.query-offset instead) and will be removed in v1.19.0: Duration to delay the evaluation of rules to ensure the underlying metrics have been pushed to Cortex.", @@ -5488,6 +5492,19 @@ "description": "external labels for alerting rules", "type": "object" }, + "ruler_external_url": { + "description": "Per-tenant external URL for the ruler. If set, it overrides the global -ruler.external.url for this tenant's alert notifications.", + "type": "string" + }, + "ruler_grafana_datasource_uid": { + "description": "Grafana datasource UID for alert generator URLs when format is grafana-explore.", + "type": "string" + }, + "ruler_grafana_org_id": { + "default": 1, + "description": "Grafana organization ID for alert generator URLs when format is grafana-explore.", + "type": "number" + }, "ruler_max_rule_groups_per_tenant": { "default": 0, "description": "Maximum number of rule groups per-tenant. 0 to disable.", From 52b69c6afc0628a9c6f45ac6b7a3b70174834c94 Mon Sep 17 00:00:00 2001 From: Charlie Le Date: Fri, 27 Feb 2026 16:15:51 -0800 Subject: [PATCH 02/13] Fix gofmt alignment in ruler and validation packages Co-Authored-By: Claude Opus 4.6 Signed-off-by: Charlie Le --- pkg/ruler/compat.go | 8 ++++---- pkg/ruler/ruler_test.go | 24 ++++++++++++------------ pkg/util/validation/limits.go | 12 ++++++------ 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/pkg/ruler/compat.go b/pkg/ruler/compat.go index 2e471ef7f0f..69b4fa0b9eb 100644 --- a/pkg/ruler/compat.go +++ b/pkg/ruler/compat.go @@ -374,10 +374,10 @@ func DefaultTenantManagerFactory(cfg Config, p Pusher, q storage.Queryable, engi Appendable: NewPusherAppendable(p, userID, overrides, evalMetrics.TotalWritesVec.WithLabelValues(userID), evalMetrics.FailedWritesVec.WithLabelValues(userID)), - Queryable: q, - QueryFunc: queryFunc, - Context: prometheusContext, - ExternalURL: cfg.ExternalURL.URL, + Queryable: q, + QueryFunc: queryFunc, + Context: prometheusContext, + ExternalURL: cfg.ExternalURL.URL, NotifyFunc: SendAlerts(notifier, func(expr string) string { externalURL := cfg.ExternalURL.String() if tenantURL := overrides.RulerExternalURL(userID); tenantURL != "" { diff --git a/pkg/ruler/ruler_test.go b/pkg/ruler/ruler_test.go index 6d7bb861920..b725008fe1f 100644 --- a/pkg/ruler/ruler_test.go +++ b/pkg/ruler/ruler_test.go @@ -90,18 +90,18 @@ func defaultRulerConfig(t testing.TB) Config { } type ruleLimits struct { - mtx sync.RWMutex - tenantShard float64 - maxRulesPerRuleGroup int - maxRuleGroups int - disabledRuleGroups validation.DisabledRuleGroups - maxQueryLength time.Duration - queryOffset time.Duration - externalLabels labels.Labels - externalURL string - alertGeneratorURLFormat string - grafanaDatasourceUID string - grafanaOrgID int64 + mtx sync.RWMutex + tenantShard float64 + maxRulesPerRuleGroup int + maxRuleGroups int + disabledRuleGroups validation.DisabledRuleGroups + maxQueryLength time.Duration + queryOffset time.Duration + externalLabels labels.Labels + externalURL string + alertGeneratorURLFormat string + grafanaDatasourceUID string + grafanaOrgID int64 } func (r *ruleLimits) setRulerExternalLabels(lset labels.Labels) { diff --git a/pkg/util/validation/limits.go b/pkg/util/validation/limits.go index 3c8a464671d..3aaca4169f4 100644 --- a/pkg/util/validation/limits.go +++ b/pkg/util/validation/limits.go @@ -208,12 +208,12 @@ type Limits struct { QueryRejection QueryRejection `yaml:"query_rejection" json:"query_rejection" doc:"nocli|description=Configuration for query rejection."` // Ruler defaults and limits. - RulerEvaluationDelay model.Duration `yaml:"ruler_evaluation_delay_duration" json:"ruler_evaluation_delay_duration"` - RulerTenantShardSize float64 `yaml:"ruler_tenant_shard_size" json:"ruler_tenant_shard_size"` - RulerMaxRulesPerRuleGroup int `yaml:"ruler_max_rules_per_rule_group" json:"ruler_max_rules_per_rule_group"` - RulerMaxRuleGroupsPerTenant int `yaml:"ruler_max_rule_groups_per_tenant" json:"ruler_max_rule_groups_per_tenant"` - RulerQueryOffset model.Duration `yaml:"ruler_query_offset" json:"ruler_query_offset"` - RulerExternalLabels labels.Labels `yaml:"ruler_external_labels" json:"ruler_external_labels" doc:"nocli|description=external labels for alerting rules"` + RulerEvaluationDelay model.Duration `yaml:"ruler_evaluation_delay_duration" json:"ruler_evaluation_delay_duration"` + RulerTenantShardSize float64 `yaml:"ruler_tenant_shard_size" json:"ruler_tenant_shard_size"` + RulerMaxRulesPerRuleGroup int `yaml:"ruler_max_rules_per_rule_group" json:"ruler_max_rules_per_rule_group"` + RulerMaxRuleGroupsPerTenant int `yaml:"ruler_max_rule_groups_per_tenant" json:"ruler_max_rule_groups_per_tenant"` + RulerQueryOffset model.Duration `yaml:"ruler_query_offset" json:"ruler_query_offset"` + RulerExternalLabels labels.Labels `yaml:"ruler_external_labels" json:"ruler_external_labels" doc:"nocli|description=external labels for alerting rules"` RulerExternalURL string `yaml:"ruler_external_url" json:"ruler_external_url" doc:"nocli|description=Per-tenant external URL for the ruler. If set, it overrides the global -ruler.external.url for this tenant's alert notifications."` RulerAlertGeneratorURLFormat string `yaml:"ruler_alert_generator_url_format" json:"ruler_alert_generator_url_format" doc:"nocli|description=Format for alert generator URLs. Supported values: prometheus (default), grafana-explore."` RulerGrafanaDatasourceUID string `yaml:"ruler_grafana_datasource_uid" json:"ruler_grafana_datasource_uid" doc:"nocli|description=Grafana datasource UID for alert generator URLs when format is grafana-explore."` From 41b7edb76b88a6ce6ada6c148d29d2cfdba60221 Mon Sep 17 00:00:00 2001 From: Charlie Le Date: Mon, 30 Mar 2026 11:51:19 -0700 Subject: [PATCH 03/13] Replace Grafana-specific config with generic Go template for alert generator URLs Replace the 3 Grafana-specific per-tenant config fields (ruler_alert_generator_url_format, ruler_grafana_datasource_uid, ruler_grafana_org_id) with a single generic field: ruler_alert_generator_url_template. This field accepts a Go text/template string with .ExternalURL and .Expression variables, plus built-in functions like urlquery. Users can construct any URL format (Grafana, Perses, etc.) without Cortex needing to understand specific UI formats. The ruler_external_url per-tenant override and SendAlerts signature (func(expr string) string) are kept unchanged. Co-Authored-By: Claude Opus 4.6 (1M context) Signed-off-by: Charlie Le --- docs/configuration/config-file-reference.md | 17 ++- pkg/ruler/compat.go | 21 ++-- pkg/ruler/ruler.go | 47 ++++----- pkg/ruler/ruler_test.go | 111 +++++++++++++------- pkg/util/validation/exporter_test.go | 1 - pkg/util/validation/limits.go | 39 ++++--- schemas/cortex-config-schema.json | 13 +-- 7 files changed, 131 insertions(+), 118 deletions(-) diff --git a/docs/configuration/config-file-reference.md b/docs/configuration/config-file-reference.md index 968ca55bb04..af8a0abdcab 100644 --- a/docs/configuration/config-file-reference.md +++ b/docs/configuration/config-file-reference.md @@ -4333,17 +4333,12 @@ query_rejection: # -ruler.external.url for this tenant's alert notifications. [ruler_external_url: | default = ""] -# Format for alert generator URLs. Supported values: prometheus (default), -# grafana-explore. -[ruler_alert_generator_url_format: | default = ""] - -# Grafana datasource UID for alert generator URLs when format is -# grafana-explore. -[ruler_grafana_datasource_uid: | default = ""] - -# Grafana organization ID for alert generator URLs when format is -# grafana-explore. -[ruler_grafana_org_id: | default = 1] +# Go text/template for alert generator URLs. Available variables: .ExternalURL +# (resolved external URL) and .Expression (PromQL expression). Built-in +# functions like urlquery are available. If empty, uses default Prometheus +# /graph format. Example for a custom explore link: +# "{{ .ExternalURL }}/explore?expr={{ urlquery .Expression }}" +[ruler_alert_generator_url_template: | default = ""] # Enable to allow rules to be evaluated with data from a single zone, if other # zones are not available. diff --git a/pkg/ruler/compat.go b/pkg/ruler/compat.go index 69b4fa0b9eb..b34ff795270 100644 --- a/pkg/ruler/compat.go +++ b/pkg/ruler/compat.go @@ -166,9 +166,7 @@ type RulesLimits interface { DisabledRuleGroups(userID string) validation.DisabledRuleGroups RulerExternalLabels(userID string) labels.Labels RulerExternalURL(userID string) string - RulerAlertGeneratorURLFormat(userID string) string - RulerGrafanaDatasourceUID(userID string) string - RulerGrafanaOrgID(userID string) int64 + RulerAlertGeneratorURLTemplate(userID string) string } type QueryExecutor func(ctx context.Context, qs string, t time.Time) (promql.Vector, error) @@ -383,15 +381,16 @@ func DefaultTenantManagerFactory(cfg Config, p Pusher, q storage.Queryable, engi if tenantURL := overrides.RulerExternalURL(userID); tenantURL != "" { externalURL = tenantURL } - if overrides.RulerAlertGeneratorURLFormat(userID) == "grafana-explore" { - datasourceUID := overrides.RulerGrafanaDatasourceUID(userID) - orgID := overrides.RulerGrafanaOrgID(userID) - if orgID == 0 { - orgID = 1 - } - return grafanaExploreLink(externalURL, expr, datasourceUID, orgID) + tmplStr := overrides.RulerAlertGeneratorURLTemplate(userID) + if tmplStr == "" { + return externalURL + strutil.TableLinkForExpression(expr) } - return externalURL + strutil.TableLinkForExpression(expr) + result, err := executeGeneratorURLTemplate(tmplStr, externalURL, expr) + if err != nil { + level.Warn(logger).Log("msg", "failed to execute generator URL template, falling back to prometheus format", "err", err) + return externalURL + strutil.TableLinkForExpression(expr) + } + return result }), Logger: util_log.GoKitLogToSlog(log.With(logger, "user", userID)), Registerer: reg, diff --git a/pkg/ruler/ruler.go b/pkg/ruler/ruler.go index b6db6ec886a..c4d2dd0a898 100644 --- a/pkg/ruler/ruler.go +++ b/pkg/ruler/ruler.go @@ -1,8 +1,8 @@ package ruler import ( + "bytes" "context" - "encoding/json" "flag" "fmt" "hash/fnv" @@ -13,6 +13,7 @@ import ( "sort" "strings" "sync" + "text/template" "time" "github.com/go-kit/log" @@ -531,32 +532,26 @@ func SendAlerts(n sender, generatorURLFn func(expr string) string) promRules.Not } } -// grafanaExploreLink builds a Grafana Explore URL for the given expression. -func grafanaExploreLink(baseURL, expr, datasourceUID string, orgID int64) string { - panes := map[string]any{ - "default": map[string]any{ - "datasource": datasourceUID, - "queries": []map[string]any{ - { - "refId": "A", - "expr": expr, - "datasource": map[string]string{"uid": datasourceUID, "type": "prometheus"}, - "editorMode": "code", - }, - }, - "range": map[string]string{ - "from": "now-1h", - "to": "now", - }, - }, - } - panesJSON, _ := json.Marshal(panes) +// generatorURLTemplateData holds the variables available in generator URL templates. +type generatorURLTemplateData struct { + ExternalURL string + Expression string +} - return fmt.Sprintf("%s/explore?schemaVersion=1&panes=%s&orgId=%d", - strings.TrimRight(baseURL, "/"), - url.QueryEscape(string(panesJSON)), - orgID, - ) +// executeGeneratorURLTemplate executes a Go text/template to produce a generator URL. +func executeGeneratorURLTemplate(tmplStr, externalURL, expr string) (string, error) { + tmpl, err := template.New("generator_url").Parse(tmplStr) + if err != nil { + return "", err + } + var buf bytes.Buffer + if err := tmpl.Execute(&buf, generatorURLTemplateData{ + ExternalURL: externalURL, + Expression: expr, + }); err != nil { + return "", err + } + return buf.String(), nil } func ruleGroupDisabled(ruleGroup *rulespb.RuleGroupDesc, disabledRuleGroupsForUser validation.DisabledRuleGroups) bool { diff --git a/pkg/ruler/ruler_test.go b/pkg/ruler/ruler_test.go index b725008fe1f..a305a8ec39c 100644 --- a/pkg/ruler/ruler_test.go +++ b/pkg/ruler/ruler_test.go @@ -90,18 +90,16 @@ func defaultRulerConfig(t testing.TB) Config { } type ruleLimits struct { - mtx sync.RWMutex - tenantShard float64 - maxRulesPerRuleGroup int - maxRuleGroups int - disabledRuleGroups validation.DisabledRuleGroups - maxQueryLength time.Duration - queryOffset time.Duration - externalLabels labels.Labels - externalURL string - alertGeneratorURLFormat string - grafanaDatasourceUID string - grafanaOrgID int64 + mtx sync.RWMutex + tenantShard float64 + maxRulesPerRuleGroup int + maxRuleGroups int + disabledRuleGroups validation.DisabledRuleGroups + maxQueryLength time.Duration + queryOffset time.Duration + externalLabels labels.Labels + externalURL string + alertGeneratorURLTemplate string } func (r *ruleLimits) setRulerExternalLabels(lset labels.Labels) { @@ -158,22 +156,10 @@ func (r *ruleLimits) RulerExternalURL(_ string) string { return r.externalURL } -func (r *ruleLimits) RulerAlertGeneratorURLFormat(_ string) string { +func (r *ruleLimits) RulerAlertGeneratorURLTemplate(_ string) string { r.mtx.RLock() defer r.mtx.RUnlock() - return r.alertGeneratorURLFormat -} - -func (r *ruleLimits) RulerGrafanaDatasourceUID(_ string) string { - r.mtx.RLock() - defer r.mtx.RUnlock() - return r.grafanaDatasourceUID -} - -func (r *ruleLimits) RulerGrafanaOrgID(_ string) int64 { - r.mtx.RLock() - defer r.mtx.RUnlock() - return r.grafanaOrgID + return r.alertGeneratorURLTemplate } func newEmptyQueryable() storage.Queryable { @@ -2774,7 +2760,7 @@ func TestSendAlerts(t *testing.T) { }, }, { - name: "grafana explore format", + name: "custom template format", in: []*promRules.Alert{ { Labels: labels.FromStrings("l1", "v1"), @@ -2784,33 +2770,84 @@ func TestSendAlerts(t *testing.T) { ValidUntil: time.Unix(3, 0), }, }, + exp: []*notifier.Alert{ + { + Labels: labels.FromStrings("l1", "v1"), + Annotations: labels.FromStrings("a2", "v2"), + StartsAt: time.Unix(2, 0), + EndsAt: time.Unix(3, 0), + GeneratorURL: "http://grafana.example.com/explore?expr=up", + }, + }, generatorURLFn: func(expr string) string { - return grafanaExploreLink("http://grafana.example.com", expr, "my-datasource", 1) + result, _ := executeGeneratorURLTemplate( + "{{ .ExternalURL }}/explore?expr={{ urlquery .Expression }}", + "http://grafana.example.com", expr) + return result }, }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - var received []*notifier.Alert sf := senderFunc(func(alerts ...*notifier.Alert) { if len(tc.in) == 0 { t.Fatalf("sender called with 0 alert") } - received = alerts if tc.exp != nil { require.Equal(t, tc.exp, alerts) } }) SendAlerts(sf, tc.generatorURLFn)(context.TODO(), "up", tc.in...) + }) + } +} - // Additional checks for grafana explore format - if tc.name == "grafana explore format" { - require.Len(t, received, 1) - require.Contains(t, received[0].GeneratorURL, "/explore?schemaVersion=1&panes=") - require.Contains(t, received[0].GeneratorURL, "orgId=1") - require.Contains(t, received[0].GeneratorURL, "my-datasource") - require.Contains(t, received[0].GeneratorURL, "up") +func TestExecuteGeneratorURLTemplate(t *testing.T) { + testCases := []struct { + name string + tmplStr string + externalURL string + expr string + expected string + expectErr bool + }{ + { + name: "basic template with expression", + tmplStr: "{{ .ExternalURL }}/graph?expr={{ .Expression }}", + externalURL: "http://prometheus:9090", + expr: "up", + expected: "http://prometheus:9090/graph?expr=up", + }, + { + name: "template with urlquery", + tmplStr: "{{ .ExternalURL }}/explore?expr={{ urlquery .Expression }}", + externalURL: "http://grafana.example.com", + expr: "rate(http_requests_total[5m])", + expected: "http://grafana.example.com/explore?expr=rate%28http_requests_total%5B5m%5D%29", + }, + { + name: "invalid template returns error", + tmplStr: "{{ .Invalid", + expectErr: true, + }, + { + name: "template with multiple variables", + tmplStr: "{{ .ExternalURL }}/explore?left=%7B%22queries%22:%5B%7B%22expr%22:%22{{ urlquery .Expression }}%22%7D%5D%7D", + externalURL: "http://grafana:3000", + expr: "up", + expected: "http://grafana:3000/explore?left=%7B%22queries%22:%5B%7B%22expr%22:%22up%22%7D%5D%7D", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result, err := executeGeneratorURLTemplate(tc.tmplStr, tc.externalURL, tc.expr) + if tc.expectErr { + require.Error(t, err) + } else { + require.NoError(t, err) + require.Equal(t, tc.expected, result) } }) } diff --git a/pkg/util/validation/exporter_test.go b/pkg/util/validation/exporter_test.go index 5e14e37f9ef..01f96b92750 100644 --- a/pkg/util/validation/exporter_test.go +++ b/pkg/util/validation/exporter_test.go @@ -107,7 +107,6 @@ func TestOverridesExporter_withConfig(t *testing.T) { cortex_overrides{limit_name="reject_old_samples",user="tenant-a"} 0 cortex_overrides{limit_name="reject_old_samples_max_age",user="tenant-a"} 1.2096e+06 cortex_overrides{limit_name="ruler_evaluation_delay_duration",user="tenant-a"} 0 - cortex_overrides{limit_name="ruler_grafana_org_id",user="tenant-a"} 0 cortex_overrides{limit_name="ruler_max_rule_groups_per_tenant",user="tenant-a"} 0 cortex_overrides{limit_name="ruler_max_rules_per_rule_group",user="tenant-a"} 0 cortex_overrides{limit_name="ruler_query_offset",user="tenant-a"} 0 diff --git a/pkg/util/validation/limits.go b/pkg/util/validation/limits.go index 3aaca4169f4..2a2d661c758 100644 --- a/pkg/util/validation/limits.go +++ b/pkg/util/validation/limits.go @@ -10,6 +10,7 @@ import ( "math" "regexp" "strings" + "text/template" "time" "github.com/cespare/xxhash/v2" @@ -208,17 +209,15 @@ type Limits struct { QueryRejection QueryRejection `yaml:"query_rejection" json:"query_rejection" doc:"nocli|description=Configuration for query rejection."` // Ruler defaults and limits. - RulerEvaluationDelay model.Duration `yaml:"ruler_evaluation_delay_duration" json:"ruler_evaluation_delay_duration"` - RulerTenantShardSize float64 `yaml:"ruler_tenant_shard_size" json:"ruler_tenant_shard_size"` - RulerMaxRulesPerRuleGroup int `yaml:"ruler_max_rules_per_rule_group" json:"ruler_max_rules_per_rule_group"` - RulerMaxRuleGroupsPerTenant int `yaml:"ruler_max_rule_groups_per_tenant" json:"ruler_max_rule_groups_per_tenant"` - RulerQueryOffset model.Duration `yaml:"ruler_query_offset" json:"ruler_query_offset"` - RulerExternalLabels labels.Labels `yaml:"ruler_external_labels" json:"ruler_external_labels" doc:"nocli|description=external labels for alerting rules"` - RulerExternalURL string `yaml:"ruler_external_url" json:"ruler_external_url" doc:"nocli|description=Per-tenant external URL for the ruler. If set, it overrides the global -ruler.external.url for this tenant's alert notifications."` - RulerAlertGeneratorURLFormat string `yaml:"ruler_alert_generator_url_format" json:"ruler_alert_generator_url_format" doc:"nocli|description=Format for alert generator URLs. Supported values: prometheus (default), grafana-explore."` - RulerGrafanaDatasourceUID string `yaml:"ruler_grafana_datasource_uid" json:"ruler_grafana_datasource_uid" doc:"nocli|description=Grafana datasource UID for alert generator URLs when format is grafana-explore."` - RulerGrafanaOrgID int64 `yaml:"ruler_grafana_org_id" json:"ruler_grafana_org_id" doc:"nocli|description=Grafana organization ID for alert generator URLs when format is grafana-explore.|default=1"` - RulesPartialData bool `yaml:"rules_partial_data" json:"rules_partial_data" doc:"nocli|description=Enable to allow rules to be evaluated with data from a single zone, if other zones are not available.|default=false"` + RulerEvaluationDelay model.Duration `yaml:"ruler_evaluation_delay_duration" json:"ruler_evaluation_delay_duration"` + RulerTenantShardSize float64 `yaml:"ruler_tenant_shard_size" json:"ruler_tenant_shard_size"` + RulerMaxRulesPerRuleGroup int `yaml:"ruler_max_rules_per_rule_group" json:"ruler_max_rules_per_rule_group"` + RulerMaxRuleGroupsPerTenant int `yaml:"ruler_max_rule_groups_per_tenant" json:"ruler_max_rule_groups_per_tenant"` + RulerQueryOffset model.Duration `yaml:"ruler_query_offset" json:"ruler_query_offset"` + RulerExternalLabels labels.Labels `yaml:"ruler_external_labels" json:"ruler_external_labels" doc:"nocli|description=external labels for alerting rules"` + RulerExternalURL string `yaml:"ruler_external_url" json:"ruler_external_url" doc:"nocli|description=Per-tenant external URL for the ruler. If set, it overrides the global -ruler.external.url for this tenant's alert notifications."` + RulerAlertGeneratorURLTemplate string `yaml:"ruler_alert_generator_url_template" json:"ruler_alert_generator_url_template" doc:"nocli|description=Go text/template for alert generator URLs. Available variables: .ExternalURL (resolved external URL) and .Expression (PromQL expression). Built-in functions like urlquery are available. If empty, uses default Prometheus /graph format."` + RulesPartialData bool `yaml:"rules_partial_data" json:"rules_partial_data" doc:"nocli|description=Enable to allow rules to be evaluated with data from a single zone, if other zones are not available.|default=false"` // Store-gateway. StoreGatewayTenantShardSize float64 `yaml:"store_gateway_tenant_shard_size" json:"store_gateway_tenant_shard_size"` @@ -416,6 +415,12 @@ func (l *Limits) Validate(nameValidationScheme model.ValidationScheme, shardByAl } } + if l.RulerAlertGeneratorURLTemplate != "" { + if _, err := template.New("").Parse(l.RulerAlertGeneratorURLTemplate); err != nil { + return fmt.Errorf("invalid ruler_alert_generator_url_template: %w", err) + } + } + return nil } @@ -1152,16 +1157,8 @@ func (o *Overrides) RulerExternalURL(userID string) string { return o.GetOverridesForUser(userID).RulerExternalURL } -func (o *Overrides) RulerAlertGeneratorURLFormat(userID string) string { - return o.GetOverridesForUser(userID).RulerAlertGeneratorURLFormat -} - -func (o *Overrides) RulerGrafanaDatasourceUID(userID string) string { - return o.GetOverridesForUser(userID).RulerGrafanaDatasourceUID -} - -func (o *Overrides) RulerGrafanaOrgID(userID string) int64 { - return o.GetOverridesForUser(userID).RulerGrafanaOrgID +func (o *Overrides) RulerAlertGeneratorURLTemplate(userID string) string { + return o.GetOverridesForUser(userID).RulerAlertGeneratorURLTemplate } // MaxRegexPatternLength returns the maximum length of an unoptimized regex pattern. diff --git a/schemas/cortex-config-schema.json b/schemas/cortex-config-schema.json index 82c5933f89f..47629274b2a 100644 --- a/schemas/cortex-config-schema.json +++ b/schemas/cortex-config-schema.json @@ -5475,8 +5475,8 @@ "x-cli-flag": "validation.reject-old-samples.max-age", "x-format": "duration" }, - "ruler_alert_generator_url_format": { - "description": "Format for alert generator URLs. Supported values: prometheus (default), grafana-explore.", + "ruler_alert_generator_url_template": { + "description": "Go text/template for alert generator URLs. Available variables: .ExternalURL (resolved external URL) and .Expression (PromQL expression). Built-in functions like urlquery are available. If empty, uses default Prometheus /graph format.", "type": "string" }, "ruler_evaluation_delay_duration": { @@ -5496,15 +5496,6 @@ "description": "Per-tenant external URL for the ruler. If set, it overrides the global -ruler.external.url for this tenant's alert notifications.", "type": "string" }, - "ruler_grafana_datasource_uid": { - "description": "Grafana datasource UID for alert generator URLs when format is grafana-explore.", - "type": "string" - }, - "ruler_grafana_org_id": { - "default": 1, - "description": "Grafana organization ID for alert generator URLs when format is grafana-explore.", - "type": "number" - }, "ruler_max_rule_groups_per_tenant": { "default": 0, "description": "Maximum number of rule groups per-tenant. 0 to disable.", From 8b89f9bc2f2748c0638e4dbb727fed33b425e30f Mon Sep 17 00:00:00 2001 From: Charlie Le Date: Mon, 30 Mar 2026 11:58:44 -0700 Subject: [PATCH 04/13] Add getting-started example for per-tenant alert generator URL templates Add per-tenant Alertmanager datasources (tenant-a, tenant-b) to Grafana provisioning so alerts are visible in Grafana's alerting UI. Add runtime-config.yaml with per-tenant overrides: - tenant-a: Grafana Explore URL template with full pane JSON - tenant-b: Perses explore URL template with PrometheusTimeSeriesQuery Update Perses from v0.49 to v0.53.1 and enable the explorer feature (frontend.explorer.enable: true). Rename project from "default" to "cortex" to match template URLs. Add Step 7 to the getting-started guide with instructions for: - Configuring per-tenant alert generator URL templates - Loading alertmanager configs and demo alert rules - Viewing alerts in Grafana at /alerting/groups?groupBy=alertname - Verifying generator URLs via the API Also configure ruler.alertmanager_url and ruler.external_url, and set an explicit UID on the Grafana Cortex datasource for use in templates. Co-Authored-By: Claude Opus 4.6 (1M context) Signed-off-by: Charlie Le --- docs/getting-started/.env | 2 +- docs/getting-started/cortex-config.yaml | 8 ++ docs/getting-started/docker-compose.yaml | 3 + .../grafana-datasource-docker.yaml | 23 ++++ docs/getting-started/perses/config.yaml | 7 +- .../perses/dashboards/cortex-writes.yaml | 2 +- .../perses/datasource-tenant-a.yaml | 14 ++ .../perses/datasource-tenant-b.yaml | 14 ++ docs/getting-started/perses/project.yaml | 4 +- docs/getting-started/runtime-config.yaml | 25 ++++ docs/getting-started/single-binary.md | 128 ++++++++++++++++++ 11 files changed, 225 insertions(+), 5 deletions(-) create mode 100644 docs/getting-started/perses/datasource-tenant-a.yaml create mode 100644 docs/getting-started/perses/datasource-tenant-b.yaml create mode 100644 docs/getting-started/runtime-config.yaml diff --git a/docs/getting-started/.env b/docs/getting-started/.env index 52b62bd990b..81b6cc44d5b 100644 --- a/docs/getting-started/.env +++ b/docs/getting-started/.env @@ -2,4 +2,4 @@ CORTEX_VERSION=v1.20.1 GRAFANA_VERSION=10.4.2 PROMETHEUS_VERSION=v3.2.1 SEAWEEDFS_VERSION=3.67 -PERSES_VERSION=v0.49-distroless-debug +PERSES_VERSION=v0.53.1-distroless-debug diff --git a/docs/getting-started/cortex-config.yaml b/docs/getting-started/cortex-config.yaml index 1b24084ad3f..9351b788f18 100644 --- a/docs/getting-started/cortex-config.yaml +++ b/docs/getting-started/cortex-config.yaml @@ -82,6 +82,14 @@ frontend_worker: # https://cortexmetrics.io/docs/configuration/configuration-file/#ruler_config ruler: enable_api: true + external_url: http://localhost:9009 + alertmanager_url: http://localhost:9009/alertmanager + +# Per-tenant runtime configuration (hot-reloaded without restart). +# This file configures per-tenant overrides such as custom alert generator +# URL templates for Grafana, Perses, or any metrics explorer. +runtime_config: + file: /config/runtime-config.yaml # https://cortexmetrics.io/docs/configuration/configuration-file/#ruler_storage_config ruler_storage: diff --git a/docs/getting-started/docker-compose.yaml b/docs/getting-started/docker-compose.yaml index 1c48394b16c..47ac1d7e2e3 100644 --- a/docs/getting-started/docker-compose.yaml +++ b/docs/getting-started/docker-compose.yaml @@ -17,6 +17,7 @@ services: - -config.file=/config/cortex-config.yaml volumes: - ./cortex-config.yaml:/config/cortex-config.yaml:ro + - ./runtime-config.yaml:/config/runtime-config.yaml:ro ports: - "9009:9009" healthcheck: @@ -47,6 +48,8 @@ services: volumes: - ./perses/config.yaml:/etc/perses/config/config.yaml:ro - ./perses/datasource.yaml:/etc/perses/resources/datasource.yaml:ro + - ./perses/datasource-tenant-a.yaml:/etc/perses/resources/datasource-tenant-a.yaml:ro + - ./perses/datasource-tenant-b.yaml:/etc/perses/resources/datasource-tenant-b.yaml:ro - ./perses/project.yaml:/etc/perses/resources/project.yaml:ro - ./perses/dashboards/cortex-writes.yaml:/etc/perses/resources/cortex-writes.yaml:ro prometheus: diff --git a/docs/getting-started/grafana-datasource-docker.yaml b/docs/getting-started/grafana-datasource-docker.yaml index a40cce5e65f..2087d9f237d 100644 --- a/docs/getting-started/grafana-datasource-docker.yaml +++ b/docs/getting-started/grafana-datasource-docker.yaml @@ -5,6 +5,7 @@ apiVersion: 1 datasources: - name: Cortex type: prometheus + uid: cortex access: proxy orgId: 1 url: http://cortex:9009/api/prom @@ -71,3 +72,25 @@ datasources: secureJsonData: httpHeaderValue1: cortex version: 1 + - orgId: 1 + name: Tenant A Alertmanager + type: alertmanager + access: proxy + url: http://cortex:9009/ + jsonData: + httpHeaderName1: X-Scope-OrgID + implementation: cortex + secureJsonData: + httpHeaderValue1: tenant-a + version: 1 + - orgId: 1 + name: Tenant B Alertmanager + type: alertmanager + access: proxy + url: http://cortex:9009/ + jsonData: + httpHeaderName1: X-Scope-OrgID + implementation: cortex + secureJsonData: + httpHeaderValue1: tenant-b + version: 1 diff --git a/docs/getting-started/perses/config.yaml b/docs/getting-started/perses/config.yaml index b87f81bc0f6..ba04acce34e 100644 --- a/docs/getting-started/perses/config.yaml +++ b/docs/getting-started/perses/config.yaml @@ -8,7 +8,7 @@ security: database: file: extension: yaml - folder: /perses + folder: /tmp/perses-data schemas: datasources_path: /etc/perses/cue/schemas/datasources @@ -16,6 +16,11 @@ schemas: panels_path: /etc/perses/cue/schemas/panels queries_path: /etc/perses/cue/schemas/queries variables_path: /etc/perses/cue/schemas/variables + +frontend: + explorer: + enable: true + provisioning: folders: - /etc/perses/resources \ No newline at end of file diff --git a/docs/getting-started/perses/dashboards/cortex-writes.yaml b/docs/getting-started/perses/dashboards/cortex-writes.yaml index 8705ad5f556..a7de3b2795b 100644 --- a/docs/getting-started/perses/dashboards/cortex-writes.yaml +++ b/docs/getting-started/perses/dashboards/cortex-writes.yaml @@ -4,7 +4,7 @@ metadata: createdAt: 2025-03-24T19:15:47.468680767Z updatedAt: 2025-03-24T19:43:53.000136362Z version: 12 - project: default + project: cortex spec: display: name: Cortex / Writes diff --git a/docs/getting-started/perses/datasource-tenant-a.yaml b/docs/getting-started/perses/datasource-tenant-a.yaml new file mode 100644 index 00000000000..78d67370828 --- /dev/null +++ b/docs/getting-started/perses/datasource-tenant-a.yaml @@ -0,0 +1,14 @@ +kind: GlobalDatasource +metadata: + name: TenantA +spec: + default: false + plugin: + kind: PrometheusDatasource + spec: + proxy: + kind: HTTPProxy + spec: + url: http://cortex:9009/api/prom + headers: + X-Scope-OrgID: tenant-a diff --git a/docs/getting-started/perses/datasource-tenant-b.yaml b/docs/getting-started/perses/datasource-tenant-b.yaml new file mode 100644 index 00000000000..40f80a67492 --- /dev/null +++ b/docs/getting-started/perses/datasource-tenant-b.yaml @@ -0,0 +1,14 @@ +kind: GlobalDatasource +metadata: + name: TenantB +spec: + default: false + plugin: + kind: PrometheusDatasource + spec: + proxy: + kind: HTTPProxy + spec: + url: http://cortex:9009/api/prom + headers: + X-Scope-OrgID: tenant-b diff --git a/docs/getting-started/perses/project.yaml b/docs/getting-started/perses/project.yaml index a39681c7841..3b1a1ad9835 100644 --- a/docs/getting-started/perses/project.yaml +++ b/docs/getting-started/perses/project.yaml @@ -1,6 +1,6 @@ kind: Project metadata: - name: default + name: cortex spec: display: - name: "default" \ No newline at end of file + name: "Cortex" \ No newline at end of file diff --git a/docs/getting-started/runtime-config.yaml b/docs/getting-started/runtime-config.yaml new file mode 100644 index 00000000000..487cc9864b1 --- /dev/null +++ b/docs/getting-started/runtime-config.yaml @@ -0,0 +1,25 @@ +# Runtime configuration with per-tenant overrides. +# This file is hot-reloaded by Cortex without requiring a restart. +# +# The examples below demonstrate per-tenant alert generator URL templates. +# Each tenant can have a different URL format for alert "Source" links. + +overrides: + # Tenant using Grafana Explore for alert generator URLs. + # Clicking "Source" on an alert in Alertmanager opens Grafana Explore + # with the PromQL expression pre-filled. + tenant-a: + ruler_external_url: "http://localhost:3000" + ruler_alert_generator_url_template: >- + {{ .ExternalURL }}/explore?schemaVersion=1&panes=%7B%22default%22:%7B%22datasource%22:%22cortex%22,%22queries%22:%5B%7B%22refId%22:%22A%22,%22expr%22:%22{{ urlquery .Expression }}%22%7D%5D,%22range%22:%7B%22from%22:%22now-1h%22,%22to%22:%22now%22%7D%7D%7D&orgId=1 + + # Tenant using Perses for alert generator URLs. + # Clicking "Source" on an alert opens Perses explore view with + # the PromQL expression pre-filled and the TenantB datasource selected. + tenant-b: + ruler_external_url: http://localhost:8080 + ruler_alert_generator_url_template: >- + {{ .ExternalURL }}/explore?explorer=Prometheus-PrometheusExplorer&data=%7B%22tab%22%3A%22graph%22%2C%22queries%22%3A%5B%7B%22kind%22%3A%22TimeSeriesQuery%22%2C%22spec%22%3A%7B%22plugin%22%3A%7B%22kind%22%3A%22PrometheusTimeSeriesQuery%22%2C%22spec%22%3A%7B%22datasource%22%3A%7B%22kind%22%3A%22PrometheusDatasource%22%2C%22name%22%3A%22tenantb%22%7D%2C%22query%22%3A%22{{ urlquery .Expression }}%22%7D%7D%7D%7D%5D%7D + + # Tenants without overrides use the global ruler.external.url + # and the default Prometheus /graph format. diff --git a/docs/getting-started/single-binary.md b/docs/getting-started/single-binary.md index 6321a1c238e..4b7c93ceb14 100644 --- a/docs/getting-started/single-binary.md +++ b/docs/getting-started/single-binary.md @@ -214,6 +214,133 @@ docker run --network cortex-docs-getting-started_default \ Configure Alertmanager notification policies in Grafana: [Alerting → Notification policies](http://localhost:3000/alerting/notifications?search=&alertmanager=Cortex%20Alertmanager) +## Step 7: Per-Tenant Alert Generator URLs (Optional) + +Cortex supports customizing the "Source" link on alerts per-tenant using Go `text/template` strings. This lets each tenant's alerts link back to their preferred metrics explorer — Grafana Explore, Perses, or any other tool. + +The getting-started example includes a `runtime-config.yaml` with two tenant configurations: +- **tenant-a**: Alert source links point to **Grafana Explore** +- **tenant-b**: Alert source links point to **Perses** + +### How It Works + +The `ruler_alert_generator_url_template` field accepts a Go template with two variables: +- `{{ .ExternalURL }}` — the resolved external URL for this tenant (set via `ruler_external_url`) +- `{{ .Expression }}` — the PromQL expression that triggered the alert + +Built-in Go template functions like `urlquery` are available for URL encoding. + +Example for Grafana Explore: +```yaml +ruler_external_url: "http://localhost:3000" +ruler_alert_generator_url_template: >- + {{ .ExternalURL }}/explore?expr={{ urlquery .Expression }} +``` + +### Try It Out + +1. **Load alertmanager configs** for tenant-a and tenant-b: + +```sh +# Upload alertmanager config for tenant-a +curl -X POST http://localhost:9009/api/v1/alerts \ + -H "X-Scope-OrgID: tenant-a" \ + -H "Content-Type: application/yaml" \ + --data-binary @- <<'EOF' +alertmanager_config: | + receivers: + - name: default-receiver + route: + receiver: default-receiver + group_wait: 5s + group_interval: 10s +EOF + +# Upload alertmanager config for tenant-b +curl -X POST http://localhost:9009/api/v1/alerts \ + -H "X-Scope-OrgID: tenant-b" \ + -H "Content-Type: application/yaml" \ + --data-binary @- <<'EOF' +alertmanager_config: | + receivers: + - name: default-receiver + route: + receiver: default-receiver + group_wait: 5s + group_interval: 10s +EOF +``` + +2. **Load demo alert rules** that fire immediately: + +```sh +# Alert rules for tenant-a +curl -X POST http://localhost:9009/api/v1/rules/demo \ + -H "X-Scope-OrgID: tenant-a" \ + -H "Content-Type: application/yaml" \ + --data-binary @- <<'EOF' +name: demo_alerts +interval: 10s +rules: + - alert: HighMemoryUsage + expr: vector(85) > 80 + for: 0m + labels: + severity: warning + annotations: + summary: "Memory usage is above 80%" + - alert: HighErrorRate + expr: vector(5.2) > 5 + for: 0m + labels: + severity: critical + annotations: + summary: "Error rate exceeds 5%" +EOF + +# Alert rules for tenant-b +curl -X POST http://localhost:9009/api/v1/rules/demo \ + -H "X-Scope-OrgID: tenant-b" \ + -H "Content-Type: application/yaml" \ + --data-binary @- <<'EOF' +name: demo_alerts +interval: 10s +rules: + - alert: DiskSpaceLow + expr: vector(92) > 90 + for: 0m + labels: + severity: critical + annotations: + summary: "Disk space usage above 90%" + - alert: HighLatency + expr: vector(3.5) > 2 + for: 0m + labels: + severity: warning + annotations: + summary: "P99 latency exceeds 2s" +EOF +``` + +3. **Wait ~30 seconds** for the ruler to evaluate rules and send alerts to the alertmanager. + +4. **View alerts in Grafana** at [Alerting → Alert groups](http://localhost:3000/alerting/groups?groupBy=alertname): + - Select the **Tenant A Alertmanager** datasource — click "See source" to open Grafana Explore + - Select the **Tenant B Alertmanager** datasource — click "See source" to open Perses + +5. **Verify generator URLs** via the API: + +```sh +# Tenant A: Grafana Explore URLs +curl -s "http://localhost:9009/alertmanager/api/v2/alerts" \ + -H "X-Scope-OrgID: tenant-a" | jq '.[].generatorURL' + +# Tenant B: Perses URLs +curl -s "http://localhost:9009/alertmanager/api/v2/alerts" \ + -H "X-Scope-OrgID: tenant-b" | jq '.[].generatorURL' +``` + ## Explore and Experiment Now that everything is running, try these experiments to learn how Cortex works: @@ -306,6 +433,7 @@ This setup uses several configuration files. Here's what each does: |----------------------------------|---------------------------------------------------------------| | `docker-compose.yaml` | Defines all services (Cortex, Prometheus, Grafana, SeaweedFS) | | `cortex-config.yaml` | Cortex configuration (storage, limits, components) | +| `runtime-config.yaml` | Per-tenant runtime overrides (alert generator URL templates) | | `prometheus-config.yaml` | Prometheus configuration with remote_write to Cortex | | `grafana-datasource-docker.yaml` | Grafana datasource pointing to Cortex | | `rules.yaml` | Example recording rules | From 41f462adcdb7046628da623dbd757b02482742fe Mon Sep 17 00:00:00 2001 From: Charlie Le Date: Fri, 27 Feb 2026 14:07:38 -0800 Subject: [PATCH 05/13] Add per-tenant Grafana Explore URL format for alert GeneratorURL Add support for tenants to configure alert GeneratorURL to use Grafana Explore format instead of the default Prometheus /graph format. This is controlled by three new per-tenant settings: ruler_alert_generator_url_format, ruler_grafana_datasource_uid, and ruler_grafana_org_id. Co-Authored-By: Claude Opus 4.6 Signed-off-by: Charlie Le --- docs/configuration/config-file-reference.md | 16 +++ pkg/ruler/compat.go | 21 +++- pkg/ruler/external_url.go | 56 +++++++++++ pkg/ruler/external_url_test.go | 67 +++++++++++++ pkg/ruler/manager.go | 13 ++- pkg/ruler/ruler.go | 34 ++++++- pkg/ruler/ruler_test.go | 103 +++++++++++++++++--- pkg/util/validation/exporter_test.go | 1 + pkg/util/validation/limits.go | 22 ++++- schemas/cortex-config-schema.json | 17 ++++ 10 files changed, 326 insertions(+), 24 deletions(-) create mode 100644 pkg/ruler/external_url.go create mode 100644 pkg/ruler/external_url_test.go diff --git a/docs/configuration/config-file-reference.md b/docs/configuration/config-file-reference.md index febb0c5bfdb..ae8260c92b9 100644 --- a/docs/configuration/config-file-reference.md +++ b/docs/configuration/config-file-reference.md @@ -4335,6 +4335,22 @@ query_rejection: # external labels for alerting rules [ruler_external_labels: | default = []] +# Per-tenant external URL for the ruler. If set, it overrides the global +# -ruler.external.url for this tenant's alert notifications. +[ruler_external_url: | default = ""] + +# Format for alert generator URLs. Supported values: prometheus (default), +# grafana-explore. +[ruler_alert_generator_url_format: | default = ""] + +# Grafana datasource UID for alert generator URLs when format is +# grafana-explore. +[ruler_grafana_datasource_uid: | default = ""] + +# Grafana organization ID for alert generator URLs when format is +# grafana-explore. +[ruler_grafana_org_id: | default = 1] + # Enable to allow rules to be evaluated with data from a single zone, if other # zones are not available. [rules_partial_data: | default = false] diff --git a/pkg/ruler/compat.go b/pkg/ruler/compat.go index 0dc5c0210eb..2e471ef7f0f 100644 --- a/pkg/ruler/compat.go +++ b/pkg/ruler/compat.go @@ -19,6 +19,7 @@ import ( "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/rules" "github.com/prometheus/prometheus/storage" + "github.com/prometheus/prometheus/util/strutil" "github.com/weaveworks/common/httpgrpc" "github.com/weaveworks/common/user" @@ -164,6 +165,10 @@ type RulesLimits interface { RulerQueryOffset(userID string) time.Duration DisabledRuleGroups(userID string) validation.DisabledRuleGroups RulerExternalLabels(userID string) labels.Labels + RulerExternalURL(userID string) string + RulerAlertGeneratorURLFormat(userID string) string + RulerGrafanaDatasourceUID(userID string) string + RulerGrafanaOrgID(userID string) int64 } type QueryExecutor func(ctx context.Context, qs string, t time.Time) (promql.Vector, error) @@ -373,7 +378,21 @@ func DefaultTenantManagerFactory(cfg Config, p Pusher, q storage.Queryable, engi QueryFunc: queryFunc, Context: prometheusContext, ExternalURL: cfg.ExternalURL.URL, - NotifyFunc: SendAlerts(notifier, cfg.ExternalURL.URL.String()), + NotifyFunc: SendAlerts(notifier, func(expr string) string { + externalURL := cfg.ExternalURL.String() + if tenantURL := overrides.RulerExternalURL(userID); tenantURL != "" { + externalURL = tenantURL + } + if overrides.RulerAlertGeneratorURLFormat(userID) == "grafana-explore" { + datasourceUID := overrides.RulerGrafanaDatasourceUID(userID) + orgID := overrides.RulerGrafanaOrgID(userID) + if orgID == 0 { + orgID = 1 + } + return grafanaExploreLink(externalURL, expr, datasourceUID, orgID) + } + return externalURL + strutil.TableLinkForExpression(expr) + }), Logger: util_log.GoKitLogToSlog(log.With(logger, "user", userID)), Registerer: reg, OutageTolerance: cfg.OutageTolerance, diff --git a/pkg/ruler/external_url.go b/pkg/ruler/external_url.go new file mode 100644 index 00000000000..0928413a889 --- /dev/null +++ b/pkg/ruler/external_url.go @@ -0,0 +1,56 @@ +package ruler + +import ( + "sync" +) + +// userExternalURL tracks per-user resolved external URLs and detects changes. +type userExternalURL struct { + global string + limits RulesLimits + + mtx sync.Mutex + users map[string]string +} + +func newUserExternalURL(global string, limits RulesLimits) *userExternalURL { + return &userExternalURL{ + global: global, + limits: limits, + + mtx: sync.Mutex{}, + users: map[string]string{}, + } +} + +func (e *userExternalURL) update(userID string) (string, bool) { + tenantURL := e.limits.RulerExternalURL(userID) + resolved := e.global + if tenantURL != "" { + resolved = tenantURL + } + + e.mtx.Lock() + defer e.mtx.Unlock() + + if prev, ok := e.users[userID]; ok && prev == resolved { + return resolved, false + } + + e.users[userID] = resolved + return resolved, true +} + +func (e *userExternalURL) remove(user string) { + e.mtx.Lock() + defer e.mtx.Unlock() + delete(e.users, user) +} + +func (e *userExternalURL) cleanup() { + e.mtx.Lock() + defer e.mtx.Unlock() + for user := range e.users { + delete(e.users, user) + } +} diff --git a/pkg/ruler/external_url_test.go b/pkg/ruler/external_url_test.go new file mode 100644 index 00000000000..50b88563e8e --- /dev/null +++ b/pkg/ruler/external_url_test.go @@ -0,0 +1,67 @@ +package ruler + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestUserExternalURL(t *testing.T) { + limits := ruleLimits{} + e := newUserExternalURL("http://global:9090", &limits) + + const userID = "test-user" + + t.Run("global URL used when no per-tenant override", func(t *testing.T) { + e.remove(userID) + url, changed := e.update(userID) + require.True(t, changed) + require.Equal(t, "http://global:9090", url) + }) + + t.Run("no change on second update", func(t *testing.T) { + url, changed := e.update(userID) + require.False(t, changed) + require.Equal(t, "http://global:9090", url) + }) + + t.Run("per-tenant URL overrides global", func(t *testing.T) { + limits.mtx.Lock() + limits.externalURL = "http://tenant:3000" + limits.mtx.Unlock() + + url, changed := e.update(userID) + require.True(t, changed) + require.Equal(t, "http://tenant:3000", url) + }) + + t.Run("no change when per-tenant URL is the same", func(t *testing.T) { + url, changed := e.update(userID) + require.False(t, changed) + require.Equal(t, "http://tenant:3000", url) + }) + + t.Run("revert to global when per-tenant override removed", func(t *testing.T) { + limits.mtx.Lock() + limits.externalURL = "" + limits.mtx.Unlock() + + url, changed := e.update(userID) + require.True(t, changed) + require.Equal(t, "http://global:9090", url) + }) + + t.Run("remove and cleanup lifecycle", func(t *testing.T) { + e.remove(userID) + // After remove, next update should report changed + url, changed := e.update(userID) + require.True(t, changed) + require.Equal(t, "http://global:9090", url) + + e.cleanup() + // After cleanup, next update should report changed + url, changed = e.update(userID) + require.True(t, changed) + require.Equal(t, "http://global:9090", url) + }) +} diff --git a/pkg/ruler/manager.go b/pkg/ruler/manager.go index d44a0d95829..86611201899 100644 --- a/pkg/ruler/manager.go +++ b/pkg/ruler/manager.go @@ -53,6 +53,9 @@ type DefaultMultiTenantManager struct { // Per-user externalLabels. userExternalLabels *userExternalLabels + // Per-user externalURL. + userExternalURL *userExternalURL + // rules backup rulesBackupManager *rulesBackupManager @@ -101,6 +104,7 @@ func NewDefaultMultiTenantManager(cfg Config, limits RulesLimits, managerFactory ruleEvalMetrics: evalMetrics, notifiers: map[string]*rulerNotifier{}, userExternalLabels: newUserExternalLabels(cfg.ExternalLabels, limits), + userExternalURL: newUserExternalURL(cfg.ExternalURL.String(), limits), notifiersDiscoveryMetrics: notifiersDiscoveryMetrics, mapper: newMapper(cfg.RulePath, logger), userManagers: map[string]RulesManager{}, @@ -166,6 +170,7 @@ func (r *DefaultMultiTenantManager) SyncRuleGroups(ctx context.Context, ruleGrou r.removeNotifier(userID) r.mapper.cleanupUser(userID) r.userExternalLabels.remove(userID) + r.userExternalURL.remove(userID) r.lastReloadSuccessful.DeleteLabelValues(userID) r.lastReloadSuccessfulTimestamp.DeleteLabelValues(userID) r.configUpdatesTotal.DeleteLabelValues(userID) @@ -210,6 +215,7 @@ func (r *DefaultMultiTenantManager) syncRulesToManager(ctx context.Context, user return } externalLabels, externalLabelsUpdated := r.userExternalLabels.update(user) + externalURL, externalURLUpdated := r.userExternalURL.update(user) existing := true manager := r.getRulesManager(user, ctx) @@ -222,13 +228,13 @@ func (r *DefaultMultiTenantManager) syncRulesToManager(ctx context.Context, user return } - if !existing || rulesUpdated || externalLabelsUpdated { + if !existing || rulesUpdated || externalLabelsUpdated || externalURLUpdated { level.Debug(r.logger).Log("msg", "updating rules", "user", user) r.configUpdatesTotal.WithLabelValues(user).Inc() - if (rulesUpdated || externalLabelsUpdated) && existing { + if (rulesUpdated || externalLabelsUpdated || externalURLUpdated) && existing { r.updateRuleCache(user, manager.RuleGroups()) } - err = manager.Update(r.cfg.EvaluationInterval, files, externalLabels, r.cfg.ExternalURL.String(), r.ruleGroupIterationFunc) + err = manager.Update(r.cfg.EvaluationInterval, files, externalLabels, externalURL, r.ruleGroupIterationFunc) r.deleteRuleCache(user) if err != nil { r.lastReloadSuccessful.WithLabelValues(user).Set(0) @@ -443,6 +449,7 @@ func (r *DefaultMultiTenantManager) Stop() { // cleanup user rules directories r.mapper.cleanup() r.userExternalLabels.cleanup() + r.userExternalURL.cleanup() } func (m *DefaultMultiTenantManager) ValidateRuleGroup(g rulefmt.RuleGroup) []error { diff --git a/pkg/ruler/ruler.go b/pkg/ruler/ruler.go index 97da8166239..b6db6ec886a 100644 --- a/pkg/ruler/ruler.go +++ b/pkg/ruler/ruler.go @@ -2,6 +2,7 @@ package ruler import ( "context" + "encoding/json" "flag" "fmt" "hash/fnv" @@ -26,7 +27,6 @@ import ( "github.com/prometheus/prometheus/notifier" "github.com/prometheus/prometheus/promql/parser" promRules "github.com/prometheus/prometheus/rules" - "github.com/prometheus/prometheus/util/strutil" "github.com/weaveworks/common/user" "golang.org/x/sync/errgroup" @@ -506,7 +506,7 @@ type sender interface { // It filters any non-firing alerts from the input. // // Copied from Prometheus's main.go. -func SendAlerts(n sender, externalURL string) promRules.NotifyFunc { +func SendAlerts(n sender, generatorURLFn func(expr string) string) promRules.NotifyFunc { return func(ctx context.Context, expr string, alerts ...*promRules.Alert) { var res []*notifier.Alert @@ -515,7 +515,7 @@ func SendAlerts(n sender, externalURL string) promRules.NotifyFunc { StartsAt: alert.FiredAt, Labels: alert.Labels, Annotations: alert.Annotations, - GeneratorURL: externalURL + strutil.TableLinkForExpression(expr), + GeneratorURL: generatorURLFn(expr), } if !alert.ResolvedAt.IsZero() { a.EndsAt = alert.ResolvedAt @@ -531,6 +531,34 @@ func SendAlerts(n sender, externalURL string) promRules.NotifyFunc { } } +// grafanaExploreLink builds a Grafana Explore URL for the given expression. +func grafanaExploreLink(baseURL, expr, datasourceUID string, orgID int64) string { + panes := map[string]any{ + "default": map[string]any{ + "datasource": datasourceUID, + "queries": []map[string]any{ + { + "refId": "A", + "expr": expr, + "datasource": map[string]string{"uid": datasourceUID, "type": "prometheus"}, + "editorMode": "code", + }, + }, + "range": map[string]string{ + "from": "now-1h", + "to": "now", + }, + }, + } + panesJSON, _ := json.Marshal(panes) + + return fmt.Sprintf("%s/explore?schemaVersion=1&panes=%s&orgId=%d", + strings.TrimRight(baseURL, "/"), + url.QueryEscape(string(panesJSON)), + orgID, + ) +} + func ruleGroupDisabled(ruleGroup *rulespb.RuleGroupDesc, disabledRuleGroupsForUser validation.DisabledRuleGroups) bool { for _, disabledRuleGroupForUser := range disabledRuleGroupsForUser { if ruleGroup.Namespace == disabledRuleGroupForUser.Namespace && diff --git a/pkg/ruler/ruler_test.go b/pkg/ruler/ruler_test.go index e5738945cb4..6d7bb861920 100644 --- a/pkg/ruler/ruler_test.go +++ b/pkg/ruler/ruler_test.go @@ -35,6 +35,7 @@ import ( promRules "github.com/prometheus/prometheus/rules" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/util/annotations" + "github.com/prometheus/prometheus/util/strutil" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" @@ -89,14 +90,18 @@ func defaultRulerConfig(t testing.TB) Config { } type ruleLimits struct { - mtx sync.RWMutex - tenantShard float64 - maxRulesPerRuleGroup int - maxRuleGroups int - disabledRuleGroups validation.DisabledRuleGroups - maxQueryLength time.Duration - queryOffset time.Duration - externalLabels labels.Labels + mtx sync.RWMutex + tenantShard float64 + maxRulesPerRuleGroup int + maxRuleGroups int + disabledRuleGroups validation.DisabledRuleGroups + maxQueryLength time.Duration + queryOffset time.Duration + externalLabels labels.Labels + externalURL string + alertGeneratorURLFormat string + grafanaDatasourceUID string + grafanaOrgID int64 } func (r *ruleLimits) setRulerExternalLabels(lset labels.Labels) { @@ -147,6 +152,30 @@ func (r *ruleLimits) RulerExternalLabels(_ string) labels.Labels { return r.externalLabels } +func (r *ruleLimits) RulerExternalURL(_ string) string { + r.mtx.RLock() + defer r.mtx.RUnlock() + return r.externalURL +} + +func (r *ruleLimits) RulerAlertGeneratorURLFormat(_ string) string { + r.mtx.RLock() + defer r.mtx.RUnlock() + return r.alertGeneratorURLFormat +} + +func (r *ruleLimits) RulerGrafanaDatasourceUID(_ string) string { + r.mtx.RLock() + defer r.mtx.RUnlock() + return r.grafanaDatasourceUID +} + +func (r *ruleLimits) RulerGrafanaOrgID(_ string) int64 { + r.mtx.RLock() + defer r.mtx.RUnlock() + return r.grafanaOrgID +} + func newEmptyQueryable() storage.Queryable { return storage.QueryableFunc(func(mint, maxt int64) (storage.Querier, error) { return emptyQuerier{}, nil @@ -2684,10 +2713,13 @@ func (s senderFunc) Send(alerts ...*notifier.Alert) { func TestSendAlerts(t *testing.T) { testCases := []struct { - in []*promRules.Alert - exp []*notifier.Alert + name string + in []*promRules.Alert + exp []*notifier.Alert + generatorURLFn func(expr string) string }{ { + name: "prometheus format with valid until", in: []*promRules.Alert{ { Labels: labels.FromStrings("l1", "v1"), @@ -2706,8 +2738,12 @@ func TestSendAlerts(t *testing.T) { GeneratorURL: "http://localhost:9090/graph?g0.expr=up&g0.tab=1", }, }, + generatorURLFn: func(expr string) string { + return "http://localhost:9090" + strutil.TableLinkForExpression(expr) + }, }, { + name: "prometheus format with resolved at", in: []*promRules.Alert{ { Labels: labels.FromStrings("l1", "v1"), @@ -2726,21 +2762,56 @@ func TestSendAlerts(t *testing.T) { GeneratorURL: "http://localhost:9090/graph?g0.expr=up&g0.tab=1", }, }, + generatorURLFn: func(expr string) string { + return "http://localhost:9090" + strutil.TableLinkForExpression(expr) + }, }, { - in: []*promRules.Alert{}, + name: "empty alerts", + in: []*promRules.Alert{}, + generatorURLFn: func(expr string) string { + return "http://localhost:9090" + strutil.TableLinkForExpression(expr) + }, + }, + { + name: "grafana explore format", + in: []*promRules.Alert{ + { + Labels: labels.FromStrings("l1", "v1"), + Annotations: labels.FromStrings("a2", "v2"), + ActiveAt: time.Unix(1, 0), + FiredAt: time.Unix(2, 0), + ValidUntil: time.Unix(3, 0), + }, + }, + generatorURLFn: func(expr string) string { + return grafanaExploreLink("http://grafana.example.com", expr, "my-datasource", 1) + }, }, } - for i, tc := range testCases { - t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { - senderFunc := senderFunc(func(alerts ...*notifier.Alert) { + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + var received []*notifier.Alert + sf := senderFunc(func(alerts ...*notifier.Alert) { if len(tc.in) == 0 { t.Fatalf("sender called with 0 alert") } - require.Equal(t, tc.exp, alerts) + received = alerts + if tc.exp != nil { + require.Equal(t, tc.exp, alerts) + } }) - SendAlerts(senderFunc, "http://localhost:9090")(context.TODO(), "up", tc.in...) + SendAlerts(sf, tc.generatorURLFn)(context.TODO(), "up", tc.in...) + + // Additional checks for grafana explore format + if tc.name == "grafana explore format" { + require.Len(t, received, 1) + require.Contains(t, received[0].GeneratorURL, "/explore?schemaVersion=1&panes=") + require.Contains(t, received[0].GeneratorURL, "orgId=1") + require.Contains(t, received[0].GeneratorURL, "my-datasource") + require.Contains(t, received[0].GeneratorURL, "up") + } }) } } diff --git a/pkg/util/validation/exporter_test.go b/pkg/util/validation/exporter_test.go index 01f96b92750..5e14e37f9ef 100644 --- a/pkg/util/validation/exporter_test.go +++ b/pkg/util/validation/exporter_test.go @@ -107,6 +107,7 @@ func TestOverridesExporter_withConfig(t *testing.T) { cortex_overrides{limit_name="reject_old_samples",user="tenant-a"} 0 cortex_overrides{limit_name="reject_old_samples_max_age",user="tenant-a"} 1.2096e+06 cortex_overrides{limit_name="ruler_evaluation_delay_duration",user="tenant-a"} 0 + cortex_overrides{limit_name="ruler_grafana_org_id",user="tenant-a"} 0 cortex_overrides{limit_name="ruler_max_rule_groups_per_tenant",user="tenant-a"} 0 cortex_overrides{limit_name="ruler_max_rules_per_rule_group",user="tenant-a"} 0 cortex_overrides{limit_name="ruler_query_offset",user="tenant-a"} 0 diff --git a/pkg/util/validation/limits.go b/pkg/util/validation/limits.go index 2f14b5cab8c..3c8a464671d 100644 --- a/pkg/util/validation/limits.go +++ b/pkg/util/validation/limits.go @@ -214,7 +214,11 @@ type Limits struct { RulerMaxRuleGroupsPerTenant int `yaml:"ruler_max_rule_groups_per_tenant" json:"ruler_max_rule_groups_per_tenant"` RulerQueryOffset model.Duration `yaml:"ruler_query_offset" json:"ruler_query_offset"` RulerExternalLabels labels.Labels `yaml:"ruler_external_labels" json:"ruler_external_labels" doc:"nocli|description=external labels for alerting rules"` - RulesPartialData bool `yaml:"rules_partial_data" json:"rules_partial_data" doc:"nocli|description=Enable to allow rules to be evaluated with data from a single zone, if other zones are not available.|default=false"` + RulerExternalURL string `yaml:"ruler_external_url" json:"ruler_external_url" doc:"nocli|description=Per-tenant external URL for the ruler. If set, it overrides the global -ruler.external.url for this tenant's alert notifications."` + RulerAlertGeneratorURLFormat string `yaml:"ruler_alert_generator_url_format" json:"ruler_alert_generator_url_format" doc:"nocli|description=Format for alert generator URLs. Supported values: prometheus (default), grafana-explore."` + RulerGrafanaDatasourceUID string `yaml:"ruler_grafana_datasource_uid" json:"ruler_grafana_datasource_uid" doc:"nocli|description=Grafana datasource UID for alert generator URLs when format is grafana-explore."` + RulerGrafanaOrgID int64 `yaml:"ruler_grafana_org_id" json:"ruler_grafana_org_id" doc:"nocli|description=Grafana organization ID for alert generator URLs when format is grafana-explore.|default=1"` + RulesPartialData bool `yaml:"rules_partial_data" json:"rules_partial_data" doc:"nocli|description=Enable to allow rules to be evaluated with data from a single zone, if other zones are not available.|default=false"` // Store-gateway. StoreGatewayTenantShardSize float64 `yaml:"store_gateway_tenant_shard_size" json:"store_gateway_tenant_shard_size"` @@ -1144,6 +1148,22 @@ func (o *Overrides) RulerExternalLabels(userID string) labels.Labels { return o.GetOverridesForUser(userID).RulerExternalLabels } +func (o *Overrides) RulerExternalURL(userID string) string { + return o.GetOverridesForUser(userID).RulerExternalURL +} + +func (o *Overrides) RulerAlertGeneratorURLFormat(userID string) string { + return o.GetOverridesForUser(userID).RulerAlertGeneratorURLFormat +} + +func (o *Overrides) RulerGrafanaDatasourceUID(userID string) string { + return o.GetOverridesForUser(userID).RulerGrafanaDatasourceUID +} + +func (o *Overrides) RulerGrafanaOrgID(userID string) int64 { + return o.GetOverridesForUser(userID).RulerGrafanaOrgID +} + // MaxRegexPatternLength returns the maximum length of an unoptimized regex pattern. // This is only used in Ingester. func (o *Overrides) MaxRegexPatternLength(userID string) int { diff --git a/schemas/cortex-config-schema.json b/schemas/cortex-config-schema.json index 9c98a021863..8c5ec560774 100644 --- a/schemas/cortex-config-schema.json +++ b/schemas/cortex-config-schema.json @@ -5481,6 +5481,10 @@ "x-cli-flag": "validation.reject-old-samples.max-age", "x-format": "duration" }, + "ruler_alert_generator_url_format": { + "description": "Format for alert generator URLs. Supported values: prometheus (default), grafana-explore.", + "type": "string" + }, "ruler_evaluation_delay_duration": { "default": "0s", "description": "Deprecated(use ruler.query-offset instead) and will be removed in v1.19.0: Duration to delay the evaluation of rules to ensure the underlying metrics have been pushed to Cortex.", @@ -5494,6 +5498,19 @@ "description": "external labels for alerting rules", "type": "object" }, + "ruler_external_url": { + "description": "Per-tenant external URL for the ruler. If set, it overrides the global -ruler.external.url for this tenant's alert notifications.", + "type": "string" + }, + "ruler_grafana_datasource_uid": { + "description": "Grafana datasource UID for alert generator URLs when format is grafana-explore.", + "type": "string" + }, + "ruler_grafana_org_id": { + "default": 1, + "description": "Grafana organization ID for alert generator URLs when format is grafana-explore.", + "type": "number" + }, "ruler_max_rule_groups_per_tenant": { "default": 0, "description": "Maximum number of rule groups per-tenant. 0 to disable.", From a26eea7d95416bb2a439d5b88a2e94a0ebb72dec Mon Sep 17 00:00:00 2001 From: Charlie Le Date: Fri, 27 Feb 2026 16:15:51 -0800 Subject: [PATCH 06/13] Fix gofmt alignment in ruler and validation packages Co-Authored-By: Claude Opus 4.6 Signed-off-by: Charlie Le --- pkg/ruler/compat.go | 8 ++++---- pkg/ruler/ruler_test.go | 24 ++++++++++++------------ pkg/util/validation/limits.go | 12 ++++++------ 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/pkg/ruler/compat.go b/pkg/ruler/compat.go index 2e471ef7f0f..69b4fa0b9eb 100644 --- a/pkg/ruler/compat.go +++ b/pkg/ruler/compat.go @@ -374,10 +374,10 @@ func DefaultTenantManagerFactory(cfg Config, p Pusher, q storage.Queryable, engi Appendable: NewPusherAppendable(p, userID, overrides, evalMetrics.TotalWritesVec.WithLabelValues(userID), evalMetrics.FailedWritesVec.WithLabelValues(userID)), - Queryable: q, - QueryFunc: queryFunc, - Context: prometheusContext, - ExternalURL: cfg.ExternalURL.URL, + Queryable: q, + QueryFunc: queryFunc, + Context: prometheusContext, + ExternalURL: cfg.ExternalURL.URL, NotifyFunc: SendAlerts(notifier, func(expr string) string { externalURL := cfg.ExternalURL.String() if tenantURL := overrides.RulerExternalURL(userID); tenantURL != "" { diff --git a/pkg/ruler/ruler_test.go b/pkg/ruler/ruler_test.go index 6d7bb861920..b725008fe1f 100644 --- a/pkg/ruler/ruler_test.go +++ b/pkg/ruler/ruler_test.go @@ -90,18 +90,18 @@ func defaultRulerConfig(t testing.TB) Config { } type ruleLimits struct { - mtx sync.RWMutex - tenantShard float64 - maxRulesPerRuleGroup int - maxRuleGroups int - disabledRuleGroups validation.DisabledRuleGroups - maxQueryLength time.Duration - queryOffset time.Duration - externalLabels labels.Labels - externalURL string - alertGeneratorURLFormat string - grafanaDatasourceUID string - grafanaOrgID int64 + mtx sync.RWMutex + tenantShard float64 + maxRulesPerRuleGroup int + maxRuleGroups int + disabledRuleGroups validation.DisabledRuleGroups + maxQueryLength time.Duration + queryOffset time.Duration + externalLabels labels.Labels + externalURL string + alertGeneratorURLFormat string + grafanaDatasourceUID string + grafanaOrgID int64 } func (r *ruleLimits) setRulerExternalLabels(lset labels.Labels) { diff --git a/pkg/util/validation/limits.go b/pkg/util/validation/limits.go index 3c8a464671d..3aaca4169f4 100644 --- a/pkg/util/validation/limits.go +++ b/pkg/util/validation/limits.go @@ -208,12 +208,12 @@ type Limits struct { QueryRejection QueryRejection `yaml:"query_rejection" json:"query_rejection" doc:"nocli|description=Configuration for query rejection."` // Ruler defaults and limits. - RulerEvaluationDelay model.Duration `yaml:"ruler_evaluation_delay_duration" json:"ruler_evaluation_delay_duration"` - RulerTenantShardSize float64 `yaml:"ruler_tenant_shard_size" json:"ruler_tenant_shard_size"` - RulerMaxRulesPerRuleGroup int `yaml:"ruler_max_rules_per_rule_group" json:"ruler_max_rules_per_rule_group"` - RulerMaxRuleGroupsPerTenant int `yaml:"ruler_max_rule_groups_per_tenant" json:"ruler_max_rule_groups_per_tenant"` - RulerQueryOffset model.Duration `yaml:"ruler_query_offset" json:"ruler_query_offset"` - RulerExternalLabels labels.Labels `yaml:"ruler_external_labels" json:"ruler_external_labels" doc:"nocli|description=external labels for alerting rules"` + RulerEvaluationDelay model.Duration `yaml:"ruler_evaluation_delay_duration" json:"ruler_evaluation_delay_duration"` + RulerTenantShardSize float64 `yaml:"ruler_tenant_shard_size" json:"ruler_tenant_shard_size"` + RulerMaxRulesPerRuleGroup int `yaml:"ruler_max_rules_per_rule_group" json:"ruler_max_rules_per_rule_group"` + RulerMaxRuleGroupsPerTenant int `yaml:"ruler_max_rule_groups_per_tenant" json:"ruler_max_rule_groups_per_tenant"` + RulerQueryOffset model.Duration `yaml:"ruler_query_offset" json:"ruler_query_offset"` + RulerExternalLabels labels.Labels `yaml:"ruler_external_labels" json:"ruler_external_labels" doc:"nocli|description=external labels for alerting rules"` RulerExternalURL string `yaml:"ruler_external_url" json:"ruler_external_url" doc:"nocli|description=Per-tenant external URL for the ruler. If set, it overrides the global -ruler.external.url for this tenant's alert notifications."` RulerAlertGeneratorURLFormat string `yaml:"ruler_alert_generator_url_format" json:"ruler_alert_generator_url_format" doc:"nocli|description=Format for alert generator URLs. Supported values: prometheus (default), grafana-explore."` RulerGrafanaDatasourceUID string `yaml:"ruler_grafana_datasource_uid" json:"ruler_grafana_datasource_uid" doc:"nocli|description=Grafana datasource UID for alert generator URLs when format is grafana-explore."` From 85c7adc2a341a5361d42b61d4242a4e1c9503433 Mon Sep 17 00:00:00 2001 From: Charlie Le Date: Mon, 30 Mar 2026 11:51:19 -0700 Subject: [PATCH 07/13] Replace Grafana-specific config with generic Go template for alert generator URLs Replace the 3 Grafana-specific per-tenant config fields (ruler_alert_generator_url_format, ruler_grafana_datasource_uid, ruler_grafana_org_id) with a single generic field: ruler_alert_generator_url_template. This field accepts a Go text/template string with .ExternalURL and .Expression variables, plus built-in functions like urlquery. Users can construct any URL format (Grafana, Perses, etc.) without Cortex needing to understand specific UI formats. The ruler_external_url per-tenant override and SendAlerts signature (func(expr string) string) are kept unchanged. Co-Authored-By: Claude Opus 4.6 (1M context) Signed-off-by: Charlie Le --- docs/configuration/config-file-reference.md | 17 ++- pkg/ruler/compat.go | 21 ++-- pkg/ruler/ruler.go | 47 ++++----- pkg/ruler/ruler_test.go | 111 +++++++++++++------- pkg/util/validation/exporter_test.go | 1 - pkg/util/validation/limits.go | 39 ++++--- schemas/cortex-config-schema.json | 13 +-- 7 files changed, 131 insertions(+), 118 deletions(-) diff --git a/docs/configuration/config-file-reference.md b/docs/configuration/config-file-reference.md index ae8260c92b9..7a5a01b5124 100644 --- a/docs/configuration/config-file-reference.md +++ b/docs/configuration/config-file-reference.md @@ -4339,17 +4339,12 @@ query_rejection: # -ruler.external.url for this tenant's alert notifications. [ruler_external_url: | default = ""] -# Format for alert generator URLs. Supported values: prometheus (default), -# grafana-explore. -[ruler_alert_generator_url_format: | default = ""] - -# Grafana datasource UID for alert generator URLs when format is -# grafana-explore. -[ruler_grafana_datasource_uid: | default = ""] - -# Grafana organization ID for alert generator URLs when format is -# grafana-explore. -[ruler_grafana_org_id: | default = 1] +# Go text/template for alert generator URLs. Available variables: .ExternalURL +# (resolved external URL) and .Expression (PromQL expression). Built-in +# functions like urlquery are available. If empty, uses default Prometheus +# /graph format. Example for a custom explore link: +# "{{ .ExternalURL }}/explore?expr={{ urlquery .Expression }}" +[ruler_alert_generator_url_template: | default = ""] # Enable to allow rules to be evaluated with data from a single zone, if other # zones are not available. diff --git a/pkg/ruler/compat.go b/pkg/ruler/compat.go index 69b4fa0b9eb..b34ff795270 100644 --- a/pkg/ruler/compat.go +++ b/pkg/ruler/compat.go @@ -166,9 +166,7 @@ type RulesLimits interface { DisabledRuleGroups(userID string) validation.DisabledRuleGroups RulerExternalLabels(userID string) labels.Labels RulerExternalURL(userID string) string - RulerAlertGeneratorURLFormat(userID string) string - RulerGrafanaDatasourceUID(userID string) string - RulerGrafanaOrgID(userID string) int64 + RulerAlertGeneratorURLTemplate(userID string) string } type QueryExecutor func(ctx context.Context, qs string, t time.Time) (promql.Vector, error) @@ -383,15 +381,16 @@ func DefaultTenantManagerFactory(cfg Config, p Pusher, q storage.Queryable, engi if tenantURL := overrides.RulerExternalURL(userID); tenantURL != "" { externalURL = tenantURL } - if overrides.RulerAlertGeneratorURLFormat(userID) == "grafana-explore" { - datasourceUID := overrides.RulerGrafanaDatasourceUID(userID) - orgID := overrides.RulerGrafanaOrgID(userID) - if orgID == 0 { - orgID = 1 - } - return grafanaExploreLink(externalURL, expr, datasourceUID, orgID) + tmplStr := overrides.RulerAlertGeneratorURLTemplate(userID) + if tmplStr == "" { + return externalURL + strutil.TableLinkForExpression(expr) } - return externalURL + strutil.TableLinkForExpression(expr) + result, err := executeGeneratorURLTemplate(tmplStr, externalURL, expr) + if err != nil { + level.Warn(logger).Log("msg", "failed to execute generator URL template, falling back to prometheus format", "err", err) + return externalURL + strutil.TableLinkForExpression(expr) + } + return result }), Logger: util_log.GoKitLogToSlog(log.With(logger, "user", userID)), Registerer: reg, diff --git a/pkg/ruler/ruler.go b/pkg/ruler/ruler.go index b6db6ec886a..c4d2dd0a898 100644 --- a/pkg/ruler/ruler.go +++ b/pkg/ruler/ruler.go @@ -1,8 +1,8 @@ package ruler import ( + "bytes" "context" - "encoding/json" "flag" "fmt" "hash/fnv" @@ -13,6 +13,7 @@ import ( "sort" "strings" "sync" + "text/template" "time" "github.com/go-kit/log" @@ -531,32 +532,26 @@ func SendAlerts(n sender, generatorURLFn func(expr string) string) promRules.Not } } -// grafanaExploreLink builds a Grafana Explore URL for the given expression. -func grafanaExploreLink(baseURL, expr, datasourceUID string, orgID int64) string { - panes := map[string]any{ - "default": map[string]any{ - "datasource": datasourceUID, - "queries": []map[string]any{ - { - "refId": "A", - "expr": expr, - "datasource": map[string]string{"uid": datasourceUID, "type": "prometheus"}, - "editorMode": "code", - }, - }, - "range": map[string]string{ - "from": "now-1h", - "to": "now", - }, - }, - } - panesJSON, _ := json.Marshal(panes) +// generatorURLTemplateData holds the variables available in generator URL templates. +type generatorURLTemplateData struct { + ExternalURL string + Expression string +} - return fmt.Sprintf("%s/explore?schemaVersion=1&panes=%s&orgId=%d", - strings.TrimRight(baseURL, "/"), - url.QueryEscape(string(panesJSON)), - orgID, - ) +// executeGeneratorURLTemplate executes a Go text/template to produce a generator URL. +func executeGeneratorURLTemplate(tmplStr, externalURL, expr string) (string, error) { + tmpl, err := template.New("generator_url").Parse(tmplStr) + if err != nil { + return "", err + } + var buf bytes.Buffer + if err := tmpl.Execute(&buf, generatorURLTemplateData{ + ExternalURL: externalURL, + Expression: expr, + }); err != nil { + return "", err + } + return buf.String(), nil } func ruleGroupDisabled(ruleGroup *rulespb.RuleGroupDesc, disabledRuleGroupsForUser validation.DisabledRuleGroups) bool { diff --git a/pkg/ruler/ruler_test.go b/pkg/ruler/ruler_test.go index b725008fe1f..a305a8ec39c 100644 --- a/pkg/ruler/ruler_test.go +++ b/pkg/ruler/ruler_test.go @@ -90,18 +90,16 @@ func defaultRulerConfig(t testing.TB) Config { } type ruleLimits struct { - mtx sync.RWMutex - tenantShard float64 - maxRulesPerRuleGroup int - maxRuleGroups int - disabledRuleGroups validation.DisabledRuleGroups - maxQueryLength time.Duration - queryOffset time.Duration - externalLabels labels.Labels - externalURL string - alertGeneratorURLFormat string - grafanaDatasourceUID string - grafanaOrgID int64 + mtx sync.RWMutex + tenantShard float64 + maxRulesPerRuleGroup int + maxRuleGroups int + disabledRuleGroups validation.DisabledRuleGroups + maxQueryLength time.Duration + queryOffset time.Duration + externalLabels labels.Labels + externalURL string + alertGeneratorURLTemplate string } func (r *ruleLimits) setRulerExternalLabels(lset labels.Labels) { @@ -158,22 +156,10 @@ func (r *ruleLimits) RulerExternalURL(_ string) string { return r.externalURL } -func (r *ruleLimits) RulerAlertGeneratorURLFormat(_ string) string { +func (r *ruleLimits) RulerAlertGeneratorURLTemplate(_ string) string { r.mtx.RLock() defer r.mtx.RUnlock() - return r.alertGeneratorURLFormat -} - -func (r *ruleLimits) RulerGrafanaDatasourceUID(_ string) string { - r.mtx.RLock() - defer r.mtx.RUnlock() - return r.grafanaDatasourceUID -} - -func (r *ruleLimits) RulerGrafanaOrgID(_ string) int64 { - r.mtx.RLock() - defer r.mtx.RUnlock() - return r.grafanaOrgID + return r.alertGeneratorURLTemplate } func newEmptyQueryable() storage.Queryable { @@ -2774,7 +2760,7 @@ func TestSendAlerts(t *testing.T) { }, }, { - name: "grafana explore format", + name: "custom template format", in: []*promRules.Alert{ { Labels: labels.FromStrings("l1", "v1"), @@ -2784,33 +2770,84 @@ func TestSendAlerts(t *testing.T) { ValidUntil: time.Unix(3, 0), }, }, + exp: []*notifier.Alert{ + { + Labels: labels.FromStrings("l1", "v1"), + Annotations: labels.FromStrings("a2", "v2"), + StartsAt: time.Unix(2, 0), + EndsAt: time.Unix(3, 0), + GeneratorURL: "http://grafana.example.com/explore?expr=up", + }, + }, generatorURLFn: func(expr string) string { - return grafanaExploreLink("http://grafana.example.com", expr, "my-datasource", 1) + result, _ := executeGeneratorURLTemplate( + "{{ .ExternalURL }}/explore?expr={{ urlquery .Expression }}", + "http://grafana.example.com", expr) + return result }, }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - var received []*notifier.Alert sf := senderFunc(func(alerts ...*notifier.Alert) { if len(tc.in) == 0 { t.Fatalf("sender called with 0 alert") } - received = alerts if tc.exp != nil { require.Equal(t, tc.exp, alerts) } }) SendAlerts(sf, tc.generatorURLFn)(context.TODO(), "up", tc.in...) + }) + } +} - // Additional checks for grafana explore format - if tc.name == "grafana explore format" { - require.Len(t, received, 1) - require.Contains(t, received[0].GeneratorURL, "/explore?schemaVersion=1&panes=") - require.Contains(t, received[0].GeneratorURL, "orgId=1") - require.Contains(t, received[0].GeneratorURL, "my-datasource") - require.Contains(t, received[0].GeneratorURL, "up") +func TestExecuteGeneratorURLTemplate(t *testing.T) { + testCases := []struct { + name string + tmplStr string + externalURL string + expr string + expected string + expectErr bool + }{ + { + name: "basic template with expression", + tmplStr: "{{ .ExternalURL }}/graph?expr={{ .Expression }}", + externalURL: "http://prometheus:9090", + expr: "up", + expected: "http://prometheus:9090/graph?expr=up", + }, + { + name: "template with urlquery", + tmplStr: "{{ .ExternalURL }}/explore?expr={{ urlquery .Expression }}", + externalURL: "http://grafana.example.com", + expr: "rate(http_requests_total[5m])", + expected: "http://grafana.example.com/explore?expr=rate%28http_requests_total%5B5m%5D%29", + }, + { + name: "invalid template returns error", + tmplStr: "{{ .Invalid", + expectErr: true, + }, + { + name: "template with multiple variables", + tmplStr: "{{ .ExternalURL }}/explore?left=%7B%22queries%22:%5B%7B%22expr%22:%22{{ urlquery .Expression }}%22%7D%5D%7D", + externalURL: "http://grafana:3000", + expr: "up", + expected: "http://grafana:3000/explore?left=%7B%22queries%22:%5B%7B%22expr%22:%22up%22%7D%5D%7D", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result, err := executeGeneratorURLTemplate(tc.tmplStr, tc.externalURL, tc.expr) + if tc.expectErr { + require.Error(t, err) + } else { + require.NoError(t, err) + require.Equal(t, tc.expected, result) } }) } diff --git a/pkg/util/validation/exporter_test.go b/pkg/util/validation/exporter_test.go index 5e14e37f9ef..01f96b92750 100644 --- a/pkg/util/validation/exporter_test.go +++ b/pkg/util/validation/exporter_test.go @@ -107,7 +107,6 @@ func TestOverridesExporter_withConfig(t *testing.T) { cortex_overrides{limit_name="reject_old_samples",user="tenant-a"} 0 cortex_overrides{limit_name="reject_old_samples_max_age",user="tenant-a"} 1.2096e+06 cortex_overrides{limit_name="ruler_evaluation_delay_duration",user="tenant-a"} 0 - cortex_overrides{limit_name="ruler_grafana_org_id",user="tenant-a"} 0 cortex_overrides{limit_name="ruler_max_rule_groups_per_tenant",user="tenant-a"} 0 cortex_overrides{limit_name="ruler_max_rules_per_rule_group",user="tenant-a"} 0 cortex_overrides{limit_name="ruler_query_offset",user="tenant-a"} 0 diff --git a/pkg/util/validation/limits.go b/pkg/util/validation/limits.go index 3aaca4169f4..2a2d661c758 100644 --- a/pkg/util/validation/limits.go +++ b/pkg/util/validation/limits.go @@ -10,6 +10,7 @@ import ( "math" "regexp" "strings" + "text/template" "time" "github.com/cespare/xxhash/v2" @@ -208,17 +209,15 @@ type Limits struct { QueryRejection QueryRejection `yaml:"query_rejection" json:"query_rejection" doc:"nocli|description=Configuration for query rejection."` // Ruler defaults and limits. - RulerEvaluationDelay model.Duration `yaml:"ruler_evaluation_delay_duration" json:"ruler_evaluation_delay_duration"` - RulerTenantShardSize float64 `yaml:"ruler_tenant_shard_size" json:"ruler_tenant_shard_size"` - RulerMaxRulesPerRuleGroup int `yaml:"ruler_max_rules_per_rule_group" json:"ruler_max_rules_per_rule_group"` - RulerMaxRuleGroupsPerTenant int `yaml:"ruler_max_rule_groups_per_tenant" json:"ruler_max_rule_groups_per_tenant"` - RulerQueryOffset model.Duration `yaml:"ruler_query_offset" json:"ruler_query_offset"` - RulerExternalLabels labels.Labels `yaml:"ruler_external_labels" json:"ruler_external_labels" doc:"nocli|description=external labels for alerting rules"` - RulerExternalURL string `yaml:"ruler_external_url" json:"ruler_external_url" doc:"nocli|description=Per-tenant external URL for the ruler. If set, it overrides the global -ruler.external.url for this tenant's alert notifications."` - RulerAlertGeneratorURLFormat string `yaml:"ruler_alert_generator_url_format" json:"ruler_alert_generator_url_format" doc:"nocli|description=Format for alert generator URLs. Supported values: prometheus (default), grafana-explore."` - RulerGrafanaDatasourceUID string `yaml:"ruler_grafana_datasource_uid" json:"ruler_grafana_datasource_uid" doc:"nocli|description=Grafana datasource UID for alert generator URLs when format is grafana-explore."` - RulerGrafanaOrgID int64 `yaml:"ruler_grafana_org_id" json:"ruler_grafana_org_id" doc:"nocli|description=Grafana organization ID for alert generator URLs when format is grafana-explore.|default=1"` - RulesPartialData bool `yaml:"rules_partial_data" json:"rules_partial_data" doc:"nocli|description=Enable to allow rules to be evaluated with data from a single zone, if other zones are not available.|default=false"` + RulerEvaluationDelay model.Duration `yaml:"ruler_evaluation_delay_duration" json:"ruler_evaluation_delay_duration"` + RulerTenantShardSize float64 `yaml:"ruler_tenant_shard_size" json:"ruler_tenant_shard_size"` + RulerMaxRulesPerRuleGroup int `yaml:"ruler_max_rules_per_rule_group" json:"ruler_max_rules_per_rule_group"` + RulerMaxRuleGroupsPerTenant int `yaml:"ruler_max_rule_groups_per_tenant" json:"ruler_max_rule_groups_per_tenant"` + RulerQueryOffset model.Duration `yaml:"ruler_query_offset" json:"ruler_query_offset"` + RulerExternalLabels labels.Labels `yaml:"ruler_external_labels" json:"ruler_external_labels" doc:"nocli|description=external labels for alerting rules"` + RulerExternalURL string `yaml:"ruler_external_url" json:"ruler_external_url" doc:"nocli|description=Per-tenant external URL for the ruler. If set, it overrides the global -ruler.external.url for this tenant's alert notifications."` + RulerAlertGeneratorURLTemplate string `yaml:"ruler_alert_generator_url_template" json:"ruler_alert_generator_url_template" doc:"nocli|description=Go text/template for alert generator URLs. Available variables: .ExternalURL (resolved external URL) and .Expression (PromQL expression). Built-in functions like urlquery are available. If empty, uses default Prometheus /graph format."` + RulesPartialData bool `yaml:"rules_partial_data" json:"rules_partial_data" doc:"nocli|description=Enable to allow rules to be evaluated with data from a single zone, if other zones are not available.|default=false"` // Store-gateway. StoreGatewayTenantShardSize float64 `yaml:"store_gateway_tenant_shard_size" json:"store_gateway_tenant_shard_size"` @@ -416,6 +415,12 @@ func (l *Limits) Validate(nameValidationScheme model.ValidationScheme, shardByAl } } + if l.RulerAlertGeneratorURLTemplate != "" { + if _, err := template.New("").Parse(l.RulerAlertGeneratorURLTemplate); err != nil { + return fmt.Errorf("invalid ruler_alert_generator_url_template: %w", err) + } + } + return nil } @@ -1152,16 +1157,8 @@ func (o *Overrides) RulerExternalURL(userID string) string { return o.GetOverridesForUser(userID).RulerExternalURL } -func (o *Overrides) RulerAlertGeneratorURLFormat(userID string) string { - return o.GetOverridesForUser(userID).RulerAlertGeneratorURLFormat -} - -func (o *Overrides) RulerGrafanaDatasourceUID(userID string) string { - return o.GetOverridesForUser(userID).RulerGrafanaDatasourceUID -} - -func (o *Overrides) RulerGrafanaOrgID(userID string) int64 { - return o.GetOverridesForUser(userID).RulerGrafanaOrgID +func (o *Overrides) RulerAlertGeneratorURLTemplate(userID string) string { + return o.GetOverridesForUser(userID).RulerAlertGeneratorURLTemplate } // MaxRegexPatternLength returns the maximum length of an unoptimized regex pattern. diff --git a/schemas/cortex-config-schema.json b/schemas/cortex-config-schema.json index 8c5ec560774..dc7567bbcde 100644 --- a/schemas/cortex-config-schema.json +++ b/schemas/cortex-config-schema.json @@ -5481,8 +5481,8 @@ "x-cli-flag": "validation.reject-old-samples.max-age", "x-format": "duration" }, - "ruler_alert_generator_url_format": { - "description": "Format for alert generator URLs. Supported values: prometheus (default), grafana-explore.", + "ruler_alert_generator_url_template": { + "description": "Go text/template for alert generator URLs. Available variables: .ExternalURL (resolved external URL) and .Expression (PromQL expression). Built-in functions like urlquery are available. If empty, uses default Prometheus /graph format.", "type": "string" }, "ruler_evaluation_delay_duration": { @@ -5502,15 +5502,6 @@ "description": "Per-tenant external URL for the ruler. If set, it overrides the global -ruler.external.url for this tenant's alert notifications.", "type": "string" }, - "ruler_grafana_datasource_uid": { - "description": "Grafana datasource UID for alert generator URLs when format is grafana-explore.", - "type": "string" - }, - "ruler_grafana_org_id": { - "default": 1, - "description": "Grafana organization ID for alert generator URLs when format is grafana-explore.", - "type": "number" - }, "ruler_max_rule_groups_per_tenant": { "default": 0, "description": "Maximum number of rule groups per-tenant. 0 to disable.", From b56fde0706abbf0c2031f2bacd69752674f38f39 Mon Sep 17 00:00:00 2001 From: Charlie Le Date: Mon, 30 Mar 2026 11:58:44 -0700 Subject: [PATCH 08/13] Add getting-started example for per-tenant alert generator URL templates Add per-tenant Alertmanager datasources (tenant-a, tenant-b) to Grafana provisioning so alerts are visible in Grafana's alerting UI. Add runtime-config.yaml with per-tenant overrides: - tenant-a: Grafana Explore URL template with full pane JSON - tenant-b: Perses explore URL template with PrometheusTimeSeriesQuery Update Perses from v0.49 to v0.53.1 and enable the explorer feature (frontend.explorer.enable: true). Rename project from "default" to "cortex" to match template URLs. Add Step 7 to the getting-started guide with instructions for: - Configuring per-tenant alert generator URL templates - Loading alertmanager configs and demo alert rules - Viewing alerts in Grafana at /alerting/groups?groupBy=alertname - Verifying generator URLs via the API Also configure ruler.alertmanager_url and ruler.external_url, and set an explicit UID on the Grafana Cortex datasource for use in templates. Co-Authored-By: Claude Opus 4.6 (1M context) Signed-off-by: Charlie Le --- docs/getting-started/.env | 2 +- docs/getting-started/cortex-config.yaml | 8 ++ docs/getting-started/docker-compose.yaml | 3 + .../grafana-datasource-docker.yaml | 23 ++++ docs/getting-started/perses/config.yaml | 7 +- .../perses/dashboards/cortex-writes.yaml | 2 +- .../perses/datasource-tenant-a.yaml | 14 ++ .../perses/datasource-tenant-b.yaml | 14 ++ docs/getting-started/perses/project.yaml | 4 +- docs/getting-started/runtime-config.yaml | 25 ++++ docs/getting-started/single-binary.md | 128 ++++++++++++++++++ 11 files changed, 225 insertions(+), 5 deletions(-) create mode 100644 docs/getting-started/perses/datasource-tenant-a.yaml create mode 100644 docs/getting-started/perses/datasource-tenant-b.yaml create mode 100644 docs/getting-started/runtime-config.yaml diff --git a/docs/getting-started/.env b/docs/getting-started/.env index 52b62bd990b..81b6cc44d5b 100644 --- a/docs/getting-started/.env +++ b/docs/getting-started/.env @@ -2,4 +2,4 @@ CORTEX_VERSION=v1.20.1 GRAFANA_VERSION=10.4.2 PROMETHEUS_VERSION=v3.2.1 SEAWEEDFS_VERSION=3.67 -PERSES_VERSION=v0.49-distroless-debug +PERSES_VERSION=v0.53.1-distroless-debug diff --git a/docs/getting-started/cortex-config.yaml b/docs/getting-started/cortex-config.yaml index 1b24084ad3f..9351b788f18 100644 --- a/docs/getting-started/cortex-config.yaml +++ b/docs/getting-started/cortex-config.yaml @@ -82,6 +82,14 @@ frontend_worker: # https://cortexmetrics.io/docs/configuration/configuration-file/#ruler_config ruler: enable_api: true + external_url: http://localhost:9009 + alertmanager_url: http://localhost:9009/alertmanager + +# Per-tenant runtime configuration (hot-reloaded without restart). +# This file configures per-tenant overrides such as custom alert generator +# URL templates for Grafana, Perses, or any metrics explorer. +runtime_config: + file: /config/runtime-config.yaml # https://cortexmetrics.io/docs/configuration/configuration-file/#ruler_storage_config ruler_storage: diff --git a/docs/getting-started/docker-compose.yaml b/docs/getting-started/docker-compose.yaml index 1c48394b16c..47ac1d7e2e3 100644 --- a/docs/getting-started/docker-compose.yaml +++ b/docs/getting-started/docker-compose.yaml @@ -17,6 +17,7 @@ services: - -config.file=/config/cortex-config.yaml volumes: - ./cortex-config.yaml:/config/cortex-config.yaml:ro + - ./runtime-config.yaml:/config/runtime-config.yaml:ro ports: - "9009:9009" healthcheck: @@ -47,6 +48,8 @@ services: volumes: - ./perses/config.yaml:/etc/perses/config/config.yaml:ro - ./perses/datasource.yaml:/etc/perses/resources/datasource.yaml:ro + - ./perses/datasource-tenant-a.yaml:/etc/perses/resources/datasource-tenant-a.yaml:ro + - ./perses/datasource-tenant-b.yaml:/etc/perses/resources/datasource-tenant-b.yaml:ro - ./perses/project.yaml:/etc/perses/resources/project.yaml:ro - ./perses/dashboards/cortex-writes.yaml:/etc/perses/resources/cortex-writes.yaml:ro prometheus: diff --git a/docs/getting-started/grafana-datasource-docker.yaml b/docs/getting-started/grafana-datasource-docker.yaml index a40cce5e65f..2087d9f237d 100644 --- a/docs/getting-started/grafana-datasource-docker.yaml +++ b/docs/getting-started/grafana-datasource-docker.yaml @@ -5,6 +5,7 @@ apiVersion: 1 datasources: - name: Cortex type: prometheus + uid: cortex access: proxy orgId: 1 url: http://cortex:9009/api/prom @@ -71,3 +72,25 @@ datasources: secureJsonData: httpHeaderValue1: cortex version: 1 + - orgId: 1 + name: Tenant A Alertmanager + type: alertmanager + access: proxy + url: http://cortex:9009/ + jsonData: + httpHeaderName1: X-Scope-OrgID + implementation: cortex + secureJsonData: + httpHeaderValue1: tenant-a + version: 1 + - orgId: 1 + name: Tenant B Alertmanager + type: alertmanager + access: proxy + url: http://cortex:9009/ + jsonData: + httpHeaderName1: X-Scope-OrgID + implementation: cortex + secureJsonData: + httpHeaderValue1: tenant-b + version: 1 diff --git a/docs/getting-started/perses/config.yaml b/docs/getting-started/perses/config.yaml index b87f81bc0f6..ba04acce34e 100644 --- a/docs/getting-started/perses/config.yaml +++ b/docs/getting-started/perses/config.yaml @@ -8,7 +8,7 @@ security: database: file: extension: yaml - folder: /perses + folder: /tmp/perses-data schemas: datasources_path: /etc/perses/cue/schemas/datasources @@ -16,6 +16,11 @@ schemas: panels_path: /etc/perses/cue/schemas/panels queries_path: /etc/perses/cue/schemas/queries variables_path: /etc/perses/cue/schemas/variables + +frontend: + explorer: + enable: true + provisioning: folders: - /etc/perses/resources \ No newline at end of file diff --git a/docs/getting-started/perses/dashboards/cortex-writes.yaml b/docs/getting-started/perses/dashboards/cortex-writes.yaml index 8705ad5f556..a7de3b2795b 100644 --- a/docs/getting-started/perses/dashboards/cortex-writes.yaml +++ b/docs/getting-started/perses/dashboards/cortex-writes.yaml @@ -4,7 +4,7 @@ metadata: createdAt: 2025-03-24T19:15:47.468680767Z updatedAt: 2025-03-24T19:43:53.000136362Z version: 12 - project: default + project: cortex spec: display: name: Cortex / Writes diff --git a/docs/getting-started/perses/datasource-tenant-a.yaml b/docs/getting-started/perses/datasource-tenant-a.yaml new file mode 100644 index 00000000000..78d67370828 --- /dev/null +++ b/docs/getting-started/perses/datasource-tenant-a.yaml @@ -0,0 +1,14 @@ +kind: GlobalDatasource +metadata: + name: TenantA +spec: + default: false + plugin: + kind: PrometheusDatasource + spec: + proxy: + kind: HTTPProxy + spec: + url: http://cortex:9009/api/prom + headers: + X-Scope-OrgID: tenant-a diff --git a/docs/getting-started/perses/datasource-tenant-b.yaml b/docs/getting-started/perses/datasource-tenant-b.yaml new file mode 100644 index 00000000000..40f80a67492 --- /dev/null +++ b/docs/getting-started/perses/datasource-tenant-b.yaml @@ -0,0 +1,14 @@ +kind: GlobalDatasource +metadata: + name: TenantB +spec: + default: false + plugin: + kind: PrometheusDatasource + spec: + proxy: + kind: HTTPProxy + spec: + url: http://cortex:9009/api/prom + headers: + X-Scope-OrgID: tenant-b diff --git a/docs/getting-started/perses/project.yaml b/docs/getting-started/perses/project.yaml index a39681c7841..3b1a1ad9835 100644 --- a/docs/getting-started/perses/project.yaml +++ b/docs/getting-started/perses/project.yaml @@ -1,6 +1,6 @@ kind: Project metadata: - name: default + name: cortex spec: display: - name: "default" \ No newline at end of file + name: "Cortex" \ No newline at end of file diff --git a/docs/getting-started/runtime-config.yaml b/docs/getting-started/runtime-config.yaml new file mode 100644 index 00000000000..487cc9864b1 --- /dev/null +++ b/docs/getting-started/runtime-config.yaml @@ -0,0 +1,25 @@ +# Runtime configuration with per-tenant overrides. +# This file is hot-reloaded by Cortex without requiring a restart. +# +# The examples below demonstrate per-tenant alert generator URL templates. +# Each tenant can have a different URL format for alert "Source" links. + +overrides: + # Tenant using Grafana Explore for alert generator URLs. + # Clicking "Source" on an alert in Alertmanager opens Grafana Explore + # with the PromQL expression pre-filled. + tenant-a: + ruler_external_url: "http://localhost:3000" + ruler_alert_generator_url_template: >- + {{ .ExternalURL }}/explore?schemaVersion=1&panes=%7B%22default%22:%7B%22datasource%22:%22cortex%22,%22queries%22:%5B%7B%22refId%22:%22A%22,%22expr%22:%22{{ urlquery .Expression }}%22%7D%5D,%22range%22:%7B%22from%22:%22now-1h%22,%22to%22:%22now%22%7D%7D%7D&orgId=1 + + # Tenant using Perses for alert generator URLs. + # Clicking "Source" on an alert opens Perses explore view with + # the PromQL expression pre-filled and the TenantB datasource selected. + tenant-b: + ruler_external_url: http://localhost:8080 + ruler_alert_generator_url_template: >- + {{ .ExternalURL }}/explore?explorer=Prometheus-PrometheusExplorer&data=%7B%22tab%22%3A%22graph%22%2C%22queries%22%3A%5B%7B%22kind%22%3A%22TimeSeriesQuery%22%2C%22spec%22%3A%7B%22plugin%22%3A%7B%22kind%22%3A%22PrometheusTimeSeriesQuery%22%2C%22spec%22%3A%7B%22datasource%22%3A%7B%22kind%22%3A%22PrometheusDatasource%22%2C%22name%22%3A%22tenantb%22%7D%2C%22query%22%3A%22{{ urlquery .Expression }}%22%7D%7D%7D%7D%5D%7D + + # Tenants without overrides use the global ruler.external.url + # and the default Prometheus /graph format. diff --git a/docs/getting-started/single-binary.md b/docs/getting-started/single-binary.md index 6321a1c238e..4b7c93ceb14 100644 --- a/docs/getting-started/single-binary.md +++ b/docs/getting-started/single-binary.md @@ -214,6 +214,133 @@ docker run --network cortex-docs-getting-started_default \ Configure Alertmanager notification policies in Grafana: [Alerting → Notification policies](http://localhost:3000/alerting/notifications?search=&alertmanager=Cortex%20Alertmanager) +## Step 7: Per-Tenant Alert Generator URLs (Optional) + +Cortex supports customizing the "Source" link on alerts per-tenant using Go `text/template` strings. This lets each tenant's alerts link back to their preferred metrics explorer — Grafana Explore, Perses, or any other tool. + +The getting-started example includes a `runtime-config.yaml` with two tenant configurations: +- **tenant-a**: Alert source links point to **Grafana Explore** +- **tenant-b**: Alert source links point to **Perses** + +### How It Works + +The `ruler_alert_generator_url_template` field accepts a Go template with two variables: +- `{{ .ExternalURL }}` — the resolved external URL for this tenant (set via `ruler_external_url`) +- `{{ .Expression }}` — the PromQL expression that triggered the alert + +Built-in Go template functions like `urlquery` are available for URL encoding. + +Example for Grafana Explore: +```yaml +ruler_external_url: "http://localhost:3000" +ruler_alert_generator_url_template: >- + {{ .ExternalURL }}/explore?expr={{ urlquery .Expression }} +``` + +### Try It Out + +1. **Load alertmanager configs** for tenant-a and tenant-b: + +```sh +# Upload alertmanager config for tenant-a +curl -X POST http://localhost:9009/api/v1/alerts \ + -H "X-Scope-OrgID: tenant-a" \ + -H "Content-Type: application/yaml" \ + --data-binary @- <<'EOF' +alertmanager_config: | + receivers: + - name: default-receiver + route: + receiver: default-receiver + group_wait: 5s + group_interval: 10s +EOF + +# Upload alertmanager config for tenant-b +curl -X POST http://localhost:9009/api/v1/alerts \ + -H "X-Scope-OrgID: tenant-b" \ + -H "Content-Type: application/yaml" \ + --data-binary @- <<'EOF' +alertmanager_config: | + receivers: + - name: default-receiver + route: + receiver: default-receiver + group_wait: 5s + group_interval: 10s +EOF +``` + +2. **Load demo alert rules** that fire immediately: + +```sh +# Alert rules for tenant-a +curl -X POST http://localhost:9009/api/v1/rules/demo \ + -H "X-Scope-OrgID: tenant-a" \ + -H "Content-Type: application/yaml" \ + --data-binary @- <<'EOF' +name: demo_alerts +interval: 10s +rules: + - alert: HighMemoryUsage + expr: vector(85) > 80 + for: 0m + labels: + severity: warning + annotations: + summary: "Memory usage is above 80%" + - alert: HighErrorRate + expr: vector(5.2) > 5 + for: 0m + labels: + severity: critical + annotations: + summary: "Error rate exceeds 5%" +EOF + +# Alert rules for tenant-b +curl -X POST http://localhost:9009/api/v1/rules/demo \ + -H "X-Scope-OrgID: tenant-b" \ + -H "Content-Type: application/yaml" \ + --data-binary @- <<'EOF' +name: demo_alerts +interval: 10s +rules: + - alert: DiskSpaceLow + expr: vector(92) > 90 + for: 0m + labels: + severity: critical + annotations: + summary: "Disk space usage above 90%" + - alert: HighLatency + expr: vector(3.5) > 2 + for: 0m + labels: + severity: warning + annotations: + summary: "P99 latency exceeds 2s" +EOF +``` + +3. **Wait ~30 seconds** for the ruler to evaluate rules and send alerts to the alertmanager. + +4. **View alerts in Grafana** at [Alerting → Alert groups](http://localhost:3000/alerting/groups?groupBy=alertname): + - Select the **Tenant A Alertmanager** datasource — click "See source" to open Grafana Explore + - Select the **Tenant B Alertmanager** datasource — click "See source" to open Perses + +5. **Verify generator URLs** via the API: + +```sh +# Tenant A: Grafana Explore URLs +curl -s "http://localhost:9009/alertmanager/api/v2/alerts" \ + -H "X-Scope-OrgID: tenant-a" | jq '.[].generatorURL' + +# Tenant B: Perses URLs +curl -s "http://localhost:9009/alertmanager/api/v2/alerts" \ + -H "X-Scope-OrgID: tenant-b" | jq '.[].generatorURL' +``` + ## Explore and Experiment Now that everything is running, try these experiments to learn how Cortex works: @@ -306,6 +433,7 @@ This setup uses several configuration files. Here's what each does: |----------------------------------|---------------------------------------------------------------| | `docker-compose.yaml` | Defines all services (Cortex, Prometheus, Grafana, SeaweedFS) | | `cortex-config.yaml` | Cortex configuration (storage, limits, components) | +| `runtime-config.yaml` | Per-tenant runtime overrides (alert generator URL templates) | | `prometheus-config.yaml` | Prometheus configuration with remote_write to Cortex | | `grafana-datasource-docker.yaml` | Grafana datasource pointing to Cortex | | `rules.yaml` | Example recording rules | From 1b79ff0aeaecef3ea2fe748ad2b7c179659d7653 Mon Sep 17 00:00:00 2001 From: Charlie Le Date: Tue, 21 Apr 2026 16:20:50 -0700 Subject: [PATCH 09/13] fix linting Signed-off-by: Charlie Le --- docs/configuration/config-file-reference.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/configuration/config-file-reference.md b/docs/configuration/config-file-reference.md index 8e044f8173b..8c2adf98027 100644 --- a/docs/configuration/config-file-reference.md +++ b/docs/configuration/config-file-reference.md @@ -4395,8 +4395,7 @@ query_rejection: # Go text/template for alert generator URLs. Available variables: .ExternalURL # (resolved external URL) and .Expression (PromQL expression). Built-in # functions like urlquery are available. If empty, uses default Prometheus -# /graph format. Example for a custom explore link: -# "{{ .ExternalURL }}/explore?expr={{ urlquery .Expression }}" +# /graph format. [ruler_alert_generator_url_template: | default = ""] # Enable to allow rules to be evaluated with data from a single zone, if other From d5f5b98dae0ec7283364a1fe395c55c41ed63696 Mon Sep 17 00:00:00 2001 From: Charlie Le Date: Thu, 23 Apr 2026 14:30:27 -0700 Subject: [PATCH 10/13] Fix per-tenant ExternalURL, datasource UID, and add CHANGELOG - Set ExternalURL on rules.ManagerOptions to the per-tenant override so {{ $externalURL }} in alert annotation/label templates reflects the tenant's ruler_external_url, not just the global config. - Fix Grafana datasource: add explicit uid: tenant-a so the template reference "datasource":"tenant-a" resolves correctly. - Fix runtime-config.yaml template to reference "datasource":"tenant-a" instead of "datasource":"cortex". - Add text/template security comment explaining the intentional choice over html/template. - Add CHANGELOG entry for the ruler_alert_generator_url_template feature. Co-Authored-By: Claude Opus 4.6 (1M context) Signed-off-by: Charlie Le --- CHANGELOG.md | 1 + .../grafana-datasource-docker.yaml | 1 + docs/getting-started/runtime-config.yaml | 2 +- pkg/ruler/compat.go | 27 +++++++++++++------ pkg/ruler/ruler.go | 4 +++ 5 files changed, 26 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 66d2be50ef2..bfd7504aed2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # Changelog ## master / unreleased +* [FEATURE] Ruler: Add per-tenant `ruler_alert_generator_url_template` runtime config option to customize alert generator URLs using Go templates. Supports Grafana Explore, Perses, and other UIs. #7302 * [FEATURE] Distributor: Add experimental `-distributor.enable-start-timestamp` flag for Prometheus Remote Write 2.0. When enabled, `StartTimestamp (ST)` is ingested. #7371 * [FEATURE] Memberlist: Add `-memberlist.cluster-label` and `-memberlist.cluster-label-verification-disabled` to prevent accidental cross-cluster gossip joins and support rolling label rollout. #7385 * [FEATURE] Querier: Add timeout classification to classify query timeouts as 4XX (user error) or 5XX (system error) based on phase timing. When enabled, queries that spend most of their time in PromQL evaluation return `422 Unprocessable Entity` instead of `503 Service Unavailable`. #7374 diff --git a/docs/getting-started/grafana-datasource-docker.yaml b/docs/getting-started/grafana-datasource-docker.yaml index 2087d9f237d..41385b9d2f0 100644 --- a/docs/getting-started/grafana-datasource-docker.yaml +++ b/docs/getting-started/grafana-datasource-docker.yaml @@ -23,6 +23,7 @@ datasources: isDefault: true - name: Tenant A type: prometheus + uid: tenant-a access: proxy orgId: 1 url: http://cortex:9009/api/prom diff --git a/docs/getting-started/runtime-config.yaml b/docs/getting-started/runtime-config.yaml index 487cc9864b1..5fa09833fec 100644 --- a/docs/getting-started/runtime-config.yaml +++ b/docs/getting-started/runtime-config.yaml @@ -11,7 +11,7 @@ overrides: tenant-a: ruler_external_url: "http://localhost:3000" ruler_alert_generator_url_template: >- - {{ .ExternalURL }}/explore?schemaVersion=1&panes=%7B%22default%22:%7B%22datasource%22:%22cortex%22,%22queries%22:%5B%7B%22refId%22:%22A%22,%22expr%22:%22{{ urlquery .Expression }}%22%7D%5D,%22range%22:%7B%22from%22:%22now-1h%22,%22to%22:%22now%22%7D%7D%7D&orgId=1 + {{ .ExternalURL }}/explore?schemaVersion=1&panes=%7B%22default%22:%7B%22datasource%22:%22tenant-a%22,%22queries%22:%5B%7B%22refId%22:%22A%22,%22expr%22:%22{{ urlquery .Expression }}%22%7D%5D,%22range%22:%7B%22from%22:%22now-1h%22,%22to%22:%22now%22%7D%7D%7D&orgId=1 # Tenant using Perses for alert generator URLs. # Clicking "Source" on an alert opens Perses explore view with diff --git a/pkg/ruler/compat.go b/pkg/ruler/compat.go index 2b46ae2b3df..ec5ea438360 100644 --- a/pkg/ruler/compat.go +++ b/pkg/ruler/compat.go @@ -4,6 +4,7 @@ import ( "context" "errors" "fmt" + "net/url" "time" @@ -377,6 +378,20 @@ func DefaultTenantManagerFactory(cfg Config, p Pusher, q storage.Queryable, engi // for graceful shutdown of rules that are still in execution even in case the cortex context is canceled. prometheusContext := user.InjectOrgID(context.WithoutCancel(ctx), userID) + // Resolve the per-tenant external URL, falling back to the global config. + // This is used both for alert annotation/label template expansion ({{ $externalURL }}) + // and for generating the alert generator URL in NotifyFunc. + externalURL := cfg.ExternalURL.URL + externalURLStr := cfg.ExternalURL.String() + if tenantURL := overrides.RulerExternalURL(userID); tenantURL != "" { + externalURLStr = tenantURL + if parsed, err := url.Parse(tenantURL); err == nil { + externalURL = parsed + } else { + level.Warn(logger).Log("msg", "failed to parse per-tenant ruler external URL, using global", "user", userID, "url", tenantURL, "err", err) + } + } + return rules.NewManager(&rules.ManagerOptions{ Appendable: NewPusherAppendable(p, userID, overrides, evalMetrics.TotalWritesVec.WithLabelValues(userID), @@ -384,20 +399,16 @@ func DefaultTenantManagerFactory(cfg Config, p Pusher, q storage.Queryable, engi Queryable: q, QueryFunc: queryFunc, Context: prometheusContext, - ExternalURL: cfg.ExternalURL.URL, + ExternalURL: externalURL, NotifyFunc: SendAlerts(notifier, func(expr string) string { - externalURL := cfg.ExternalURL.String() - if tenantURL := overrides.RulerExternalURL(userID); tenantURL != "" { - externalURL = tenantURL - } tmplStr := overrides.RulerAlertGeneratorURLTemplate(userID) if tmplStr == "" { - return externalURL + strutil.TableLinkForExpression(expr) + return externalURLStr + strutil.TableLinkForExpression(expr) } - result, err := executeGeneratorURLTemplate(tmplStr, externalURL, expr) + result, err := executeGeneratorURLTemplate(tmplStr, externalURLStr, expr) if err != nil { level.Warn(logger).Log("msg", "failed to execute generator URL template, falling back to prometheus format", "err", err) - return externalURL + strutil.TableLinkForExpression(expr) + return externalURLStr + strutil.TableLinkForExpression(expr) } return result }), diff --git a/pkg/ruler/ruler.go b/pkg/ruler/ruler.go index 197c96eca98..9c05c23aad1 100644 --- a/pkg/ruler/ruler.go +++ b/pkg/ruler/ruler.go @@ -540,6 +540,10 @@ type generatorURLTemplateData struct { } // executeGeneratorURLTemplate executes a Go text/template to produce a generator URL. +// We intentionally use text/template instead of html/template because the output is a URL, +// not HTML. HTML-escaping would corrupt URL characters (e.g., & → &). The template is +// configured per-tenant by the operator via runtime config, so the risk is limited to +// self-harm (a tenant operator misconfiguring their own alert links). func executeGeneratorURLTemplate(tmplStr, externalURL, expr string) (string, error) { tmpl, err := template.New("generator_url").Parse(tmplStr) if err != nil { From faf65eac50e51461593867169830137d8b251f21 Mon Sep 17 00:00:00 2001 From: Friedrich Gonzalez <1517449+friedrichg@users.noreply.github.com> Date: Thu, 23 Apr 2026 17:12:06 -0700 Subject: [PATCH 11/13] Add URL validation to generator URL template output Validate that the output of executeGeneratorURLTemplate produces a safe URL by checking: - Scheme must be http or https (blocks javascript: and data: URIs) - Host must be present - Fragment must not contain HTML characters < or > (blocks script injection) Add test cases covering javascript URI, data URI, fragment injection, missing host, and valid fragment scenarios. Signed-off-by: Friedrich Gonzalez Signed-off-by: Friedrich Gonzalez <1517449+friedrichg@users.noreply.github.com> --- pkg/ruler/ruler.go | 30 ++++++++++++++++++++++++++---- pkg/ruler/ruler_test.go | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 4 deletions(-) diff --git a/pkg/ruler/ruler.go b/pkg/ruler/ruler.go index 9c05c23aad1..d6916e372ae 100644 --- a/pkg/ruler/ruler.go +++ b/pkg/ruler/ruler.go @@ -541,9 +541,8 @@ type generatorURLTemplateData struct { // executeGeneratorURLTemplate executes a Go text/template to produce a generator URL. // We intentionally use text/template instead of html/template because the output is a URL, -// not HTML. HTML-escaping would corrupt URL characters (e.g., & → &). The template is -// configured per-tenant by the operator via runtime config, so the risk is limited to -// self-harm (a tenant operator misconfiguring their own alert links). +// not HTML. HTML-escaping would corrupt URL characters (e.g., & → &). The output is +// validated to ensure it uses http/https scheme to prevent javascript: or data: injection. func executeGeneratorURLTemplate(tmplStr, externalURL, expr string) (string, error) { tmpl, err := template.New("generator_url").Parse(tmplStr) if err != nil { @@ -556,7 +555,30 @@ func executeGeneratorURLTemplate(tmplStr, externalURL, expr string) (string, err }); err != nil { return "", err } - return buf.String(), nil + result := buf.String() + if err := validateGeneratorURL(result); err != nil { + return "", err + } + return result, nil +} + +// validateGeneratorURL checks that the URL is well-formed, uses http or https scheme, +// and does not contain HTML in the fragment. +func validateGeneratorURL(rawURL string) error { + u, err := url.Parse(rawURL) + if err != nil { + return fmt.Errorf("invalid generator URL: %w", err) + } + if u.Scheme != "http" && u.Scheme != "https" { + return fmt.Errorf("generator URL has unsupported scheme %q, must be http or https", u.Scheme) + } + if u.Host == "" { + return fmt.Errorf("generator URL is missing host") + } + if strings.ContainsAny(u.Fragment, "<>") { + return fmt.Errorf("generator URL fragment contains invalid characters") + } + return nil } func ruleGroupDisabled(ruleGroup *rulespb.RuleGroupDesc, disabledRuleGroupsForUser validation.DisabledRuleGroups) bool { diff --git a/pkg/ruler/ruler_test.go b/pkg/ruler/ruler_test.go index a305a8ec39c..5562f7ee401 100644 --- a/pkg/ruler/ruler_test.go +++ b/pkg/ruler/ruler_test.go @@ -2838,6 +2838,41 @@ func TestExecuteGeneratorURLTemplate(t *testing.T) { expr: "up", expected: "http://grafana:3000/explore?left=%7B%22queries%22:%5B%7B%22expr%22:%22up%22%7D%5D%7D", }, + { + name: "javascript URI scheme is rejected", + tmplStr: "javascript://alert('xss')", + externalURL: "http://localhost:3000", + expr: "up", + expectErr: true, + }, + { + name: "data URI scheme is rejected", + tmplStr: "data:text/html,", + externalURL: "http://localhost:3000", + expr: "up", + expectErr: true, + }, + { + name: "fragment with script tag is rejected", + tmplStr: "{{ .ExternalURL }}/explore#", + externalURL: "http://localhost:3000", + expr: "up", + expectErr: true, + }, + { + name: "missing host is rejected", + tmplStr: "http:///path", + externalURL: "http://localhost:3000", + expr: "up", + expectErr: true, + }, + { + name: "valid URL with fragment is allowed", + tmplStr: "{{ .ExternalURL }}/explore#tab=graph", + externalURL: "http://localhost:3000", + expr: "up", + expected: "http://localhost:3000/explore#tab=graph", + }, } for _, tc := range testCases { From db6b0a57b79a2304631f3e45ba75151996e4ee0d Mon Sep 17 00:00:00 2001 From: Charlie Le Date: Fri, 24 Apr 2026 10:29:36 -0700 Subject: [PATCH 12/13] Re-resolve per-tenant external URL on every alert send The NotifyFunc closure was capturing externalURLStr once at manager creation time, so runtime config changes to ruler_external_url would not take effect until the ruler was restarted. Move the resolution into a helper that re-reads from overrides on each call. Co-Authored-By: Claude Opus 4.6 (1M context) Signed-off-by: Charlie Le --- pkg/ruler/compat.go | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/pkg/ruler/compat.go b/pkg/ruler/compat.go index ec5ea438360..4a8e888c9aa 100644 --- a/pkg/ruler/compat.go +++ b/pkg/ruler/compat.go @@ -378,13 +378,12 @@ func DefaultTenantManagerFactory(cfg Config, p Pusher, q storage.Queryable, engi // for graceful shutdown of rules that are still in execution even in case the cortex context is canceled. prometheusContext := user.InjectOrgID(context.WithoutCancel(ctx), userID) - // Resolve the per-tenant external URL, falling back to the global config. - // This is used both for alert annotation/label template expansion ({{ $externalURL }}) - // and for generating the alert generator URL in NotifyFunc. + // Resolve the per-tenant external URL for ManagerOptions.ExternalURL. + // This *url.URL is set once at manager creation and cannot be refreshed + // without recreating the manager. It powers the {{ externalURL }} and + // {{ pathPrefix }} template functions (not {{ $externalURL }}). externalURL := cfg.ExternalURL.URL - externalURLStr := cfg.ExternalURL.String() if tenantURL := overrides.RulerExternalURL(userID); tenantURL != "" { - externalURLStr = tenantURL if parsed, err := url.Parse(tenantURL); err == nil { externalURL = parsed } else { @@ -392,6 +391,17 @@ func DefaultTenantManagerFactory(cfg Config, p Pusher, q storage.Queryable, engi } } + // resolveExternalURL returns the per-tenant external URL string, + // re-reading from runtime config on each call so that changes + // take effect without restarting the ruler. + globalExternalURLStr := cfg.ExternalURL.String() + resolveExternalURL := func() string { + if tenantURL := overrides.RulerExternalURL(userID); tenantURL != "" { + return tenantURL + } + return globalExternalURLStr + } + return rules.NewManager(&rules.ManagerOptions{ Appendable: NewPusherAppendable(p, userID, overrides, evalMetrics.TotalWritesVec.WithLabelValues(userID), @@ -401,6 +411,7 @@ func DefaultTenantManagerFactory(cfg Config, p Pusher, q storage.Queryable, engi Context: prometheusContext, ExternalURL: externalURL, NotifyFunc: SendAlerts(notifier, func(expr string) string { + externalURLStr := resolveExternalURL() tmplStr := overrides.RulerAlertGeneratorURLTemplate(userID) if tmplStr == "" { return externalURLStr + strutil.TableLinkForExpression(expr) From a8efdeeadab215863bbca12b6bc4945eea7bac49 Mon Sep 17 00:00:00 2001 From: Charlie Le Date: Fri, 24 Apr 2026 10:33:21 -0700 Subject: [PATCH 13/13] Cache parsed generator URL template to avoid re-parsing on every alert The generator URL template is parsed from the runtime config string on every alert send. Cache the parsed template and only re-parse when the template string changes. Co-Authored-By: Claude Opus 4.6 (1M context) Signed-off-by: Charlie Le --- pkg/ruler/compat.go | 7 ++++++- pkg/ruler/ruler.go | 26 ++++++++++++++++++++++++-- pkg/ruler/ruler_test.go | 28 ++++++++++++++++++++++++++-- 3 files changed, 56 insertions(+), 5 deletions(-) diff --git a/pkg/ruler/compat.go b/pkg/ruler/compat.go index 4a8e888c9aa..ff9a5995b2b 100644 --- a/pkg/ruler/compat.go +++ b/pkg/ruler/compat.go @@ -402,6 +402,11 @@ func DefaultTenantManagerFactory(cfg Config, p Pusher, q storage.Queryable, engi return globalExternalURLStr } + // Cache for the parsed generator URL template. The closure below is called + // on every alert send; caching avoids re-parsing the template each time. + // The cache is invalidated if the template string changes via runtime config. + tmplCache := &generatorURLTemplateCache{} + return rules.NewManager(&rules.ManagerOptions{ Appendable: NewPusherAppendable(p, userID, overrides, evalMetrics.TotalWritesVec.WithLabelValues(userID), @@ -416,7 +421,7 @@ func DefaultTenantManagerFactory(cfg Config, p Pusher, q storage.Queryable, engi if tmplStr == "" { return externalURLStr + strutil.TableLinkForExpression(expr) } - result, err := executeGeneratorURLTemplate(tmplStr, externalURLStr, expr) + result, err := executeGeneratorURLTemplate(tmplCache, tmplStr, externalURLStr, expr) if err != nil { level.Warn(logger).Log("msg", "failed to execute generator URL template, falling back to prometheus format", "err", err) return externalURLStr + strutil.TableLinkForExpression(expr) diff --git a/pkg/ruler/ruler.go b/pkg/ruler/ruler.go index d6916e372ae..ee8dd00ede3 100644 --- a/pkg/ruler/ruler.go +++ b/pkg/ruler/ruler.go @@ -539,12 +539,34 @@ type generatorURLTemplateData struct { Expression string } +// generatorURLTemplateCache caches a parsed text/template keyed on the template string. +// If the template string changes (e.g., via runtime config), the cache is invalidated. +type generatorURLTemplateCache struct { + tmplStr string + tmpl *template.Template +} + +// getOrParse returns a parsed template, reusing the cached one if the template string +// hasn't changed. This avoids re-parsing on every alert send. +func (c *generatorURLTemplateCache) getOrParse(tmplStr string) (*template.Template, error) { + if c.tmpl != nil && c.tmplStr == tmplStr { + return c.tmpl, nil + } + tmpl, err := template.New("generator_url").Parse(tmplStr) + if err != nil { + return nil, err + } + c.tmplStr = tmplStr + c.tmpl = tmpl + return tmpl, nil +} + // executeGeneratorURLTemplate executes a Go text/template to produce a generator URL. // We intentionally use text/template instead of html/template because the output is a URL, // not HTML. HTML-escaping would corrupt URL characters (e.g., & → &). The output is // validated to ensure it uses http/https scheme to prevent javascript: or data: injection. -func executeGeneratorURLTemplate(tmplStr, externalURL, expr string) (string, error) { - tmpl, err := template.New("generator_url").Parse(tmplStr) +func executeGeneratorURLTemplate(cache *generatorURLTemplateCache, tmplStr, externalURL, expr string) (string, error) { + tmpl, err := cache.getOrParse(tmplStr) if err != nil { return "", err } diff --git a/pkg/ruler/ruler_test.go b/pkg/ruler/ruler_test.go index 5562f7ee401..51635a65523 100644 --- a/pkg/ruler/ruler_test.go +++ b/pkg/ruler/ruler_test.go @@ -2780,7 +2780,8 @@ func TestSendAlerts(t *testing.T) { }, }, generatorURLFn: func(expr string) string { - result, _ := executeGeneratorURLTemplate( + cache := &generatorURLTemplateCache{} + result, _ := executeGeneratorURLTemplate(cache, "{{ .ExternalURL }}/explore?expr={{ urlquery .Expression }}", "http://grafana.example.com", expr) return result @@ -2877,7 +2878,8 @@ func TestExecuteGeneratorURLTemplate(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - result, err := executeGeneratorURLTemplate(tc.tmplStr, tc.externalURL, tc.expr) + cache := &generatorURLTemplateCache{} + result, err := executeGeneratorURLTemplate(cache, tc.tmplStr, tc.externalURL, tc.expr) if tc.expectErr { require.Error(t, err) } else { @@ -2888,6 +2890,28 @@ func TestExecuteGeneratorURLTemplate(t *testing.T) { } } +func TestGeneratorURLTemplateCaching(t *testing.T) { + cache := &generatorURLTemplateCache{} + + // First call parses and caches the template. + result1, err := executeGeneratorURLTemplate(cache, "{{ .ExternalURL }}/graph?expr={{ urlquery .Expression }}", "http://localhost:9090", "up") + require.NoError(t, err) + require.Equal(t, "http://localhost:9090/graph?expr=up", result1) + cachedTmpl := cache.tmpl + + // Same template string reuses the cached parsed template. + result2, err := executeGeneratorURLTemplate(cache, "{{ .ExternalURL }}/graph?expr={{ urlquery .Expression }}", "http://localhost:9090", "rate(http_requests_total[5m])") + require.NoError(t, err) + require.Equal(t, "http://localhost:9090/graph?expr=rate%28http_requests_total%5B5m%5D%29", result2) + require.Same(t, cachedTmpl, cache.tmpl, "expected cached template to be reused") + + // Different template string invalidates the cache. + result3, err := executeGeneratorURLTemplate(cache, "{{ .ExternalURL }}/explore?expr={{ urlquery .Expression }}", "http://grafana:3000", "up") + require.NoError(t, err) + require.Equal(t, "http://grafana:3000/explore?expr=up", result3) + require.NotSame(t, cachedTmpl, cache.tmpl, "expected cache to be invalidated for new template string") +} + // Tests for whether the Ruler is able to recover ALERTS_FOR_STATE state func TestRecoverAlertsPostOutage(t *testing.T) { // Test Setup