diff --git a/apps/webapp/app/components/logs/LogsVersionFilter.tsx b/apps/webapp/app/components/logs/LogsVersionFilter.tsx new file mode 100644 index 00000000000..4cc10545060 --- /dev/null +++ b/apps/webapp/app/components/logs/LogsVersionFilter.tsx @@ -0,0 +1,58 @@ +import * as Ariakit from "@ariakit/react"; +import { SelectTrigger } from "~/components/primitives/Select"; +import { useSearchParams } from "~/hooks/useSearchParam"; +import { appliedSummary, FilterMenuProvider } from "~/components/runs/v3/SharedFilters"; +import { filterIcon, VersionsDropdown } from "~/components/runs/v3/RunFilters"; +import { AppliedFilter } from "~/components/primitives/AppliedFilter"; + +const shortcut = { key: "v" }; + +export function LogsVersionFilter() { + const { values, del } = useSearchParams(); + const selectedVersions = values("versions"); + + if (selectedVersions.length === 0 || selectedVersions.every((v) => v === "")) { + return ( + + {(search, setSearch) => ( + + Versions + + } + searchValue={search} + clearSearchValue={() => setSearch("")} + /> + )} + + ); + } + + return ( + + {(search, setSearch) => ( + }> + del(["versions", "cursor", "direction"])} + variant="secondary/small" + /> + + } + searchValue={search} + clearSearchValue={() => setSearch("")} + /> + )} + + ); +} diff --git a/apps/webapp/app/components/primitives/charts/ChartRoot.tsx b/apps/webapp/app/components/primitives/charts/ChartRoot.tsx index 9a366c9789d..3b2a2c6a3c1 100644 --- a/apps/webapp/app/components/primitives/charts/ChartRoot.tsx +++ b/apps/webapp/app/components/primitives/charts/ChartRoot.tsx @@ -40,6 +40,8 @@ export type ChartRootProps = { onViewAllLegendItems?: () => void; /** When true, constrains legend to max 50% height with scrolling */ legendScrollable?: boolean; + /** Additional className for the legend */ + legendClassName?: string; /** When true, chart fills its parent container height and distributes space between chart and legend */ fillContainer?: boolean; /** Content rendered between the chart and the legend */ @@ -87,6 +89,7 @@ export function ChartRoot({ legendValueFormatter, onViewAllLegendItems, legendScrollable = false, + legendClassName, fillContainer = false, beforeLegend, children, @@ -114,6 +117,7 @@ export function ChartRoot({ legendValueFormatter={legendValueFormatter} onViewAllLegendItems={onViewAllLegendItems} legendScrollable={legendScrollable} + legendClassName={legendClassName} fillContainer={fillContainer} beforeLegend={beforeLegend} > @@ -133,6 +137,7 @@ type ChartRootInnerProps = { legendValueFormatter?: (value: number) => string; onViewAllLegendItems?: () => void; legendScrollable?: boolean; + legendClassName?: string; fillContainer?: boolean; beforeLegend?: React.ReactNode; children: React.ComponentProps["children"]; @@ -148,6 +153,7 @@ function ChartRootInner({ legendValueFormatter, onViewAllLegendItems, legendScrollable = false, + legendClassName, fillContainer = false, beforeLegend, children, @@ -193,6 +199,7 @@ function ChartRootInner({ valueFormatter={legendValueFormatter} onViewAllLegendItems={onViewAllLegendItems} scrollable={legendScrollable} + className={legendClassName} /> )} diff --git a/apps/webapp/app/components/runs/v3/RunFilters.tsx b/apps/webapp/app/components/runs/v3/RunFilters.tsx index f643209b8cb..dc3657b42a9 100644 --- a/apps/webapp/app/components/runs/v3/RunFilters.tsx +++ b/apps/webapp/app/components/runs/v3/RunFilters.tsx @@ -1216,7 +1216,7 @@ function AppliedMachinesFilter() { ); } -function VersionsDropdown({ +export function VersionsDropdown({ trigger, clearSearchValue, searchValue, diff --git a/apps/webapp/app/models/projectAlert.server.ts b/apps/webapp/app/models/projectAlert.server.ts index d2ab0be1d1a..dbcb672ad7d 100644 --- a/apps/webapp/app/models/projectAlert.server.ts +++ b/apps/webapp/app/models/projectAlert.server.ts @@ -32,3 +32,9 @@ export const ProjectAlertSlackStorage = z.object({ }); export type ProjectAlertSlackStorage = z.infer; + +export const ErrorAlertConfig = z.object({ + evaluationIntervalMs: z.number().min(60_000).default(300_000), +}); + +export type ErrorAlertConfig = z.infer; diff --git a/apps/webapp/app/presenters/v3/ErrorGroupPresenter.server.ts b/apps/webapp/app/presenters/v3/ErrorGroupPresenter.server.ts index 024ac1e95ea..fee5310e8b9 100644 --- a/apps/webapp/app/presenters/v3/ErrorGroupPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/ErrorGroupPresenter.server.ts @@ -2,7 +2,7 @@ import { z } from "zod"; import { type ClickHouse, msToClickHouseInterval } from "@internal/clickhouse"; import { TimeGranularity } from "~/utils/timeGranularity"; import { ErrorId } from "@trigger.dev/core/v3/isomorphic"; -import { type PrismaClientOrTransaction } from "@trigger.dev/database"; +import { type ErrorGroupStatus, type PrismaClientOrTransaction } from "@trigger.dev/database"; import { timeFilterFromTo } from "~/components/runs/v3/SharedFilters"; import { type Direction, DirectionSchema } from "~/components/ListPagination"; import { findDisplayableEnvironment } from "~/models/runtimeEnvironment.server"; @@ -27,6 +27,7 @@ export type ErrorGroupOptions = { userId?: string; projectId: string; fingerprint: string; + versions?: string[]; runsPageSize?: number; period?: string; from?: number; @@ -39,6 +40,7 @@ export const ErrorGroupOptionsSchema = z.object({ userId: z.string().optional(), projectId: z.string(), fingerprint: z.string(), + versions: z.array(z.string()).optional(), runsPageSize: z.number().int().positive().max(1000).optional(), period: z.string().optional(), from: z.number().int().nonnegative().optional(), @@ -59,6 +61,18 @@ function parseClickHouseDateTime(value: string): Date { return new Date(value.replace(" ", "T") + "Z"); } +export type ErrorGroupState = { + status: ErrorGroupStatus; + resolvedAt: Date | null; + resolvedInVersion: string | null; + resolvedBy: string | null; + ignoredUntil: Date | null; + ignoredReason: string | null; + ignoredByUserId: string | null; + ignoredUntilOccurrenceRate: number | null; + ignoredUntilTotalOccurrences: number | null; +}; + export type ErrorGroupSummary = { fingerprint: string; errorType: string; @@ -68,10 +82,12 @@ export type ErrorGroupSummary = { firstSeen: Date; lastSeen: Date; affectedVersions: string[]; + state: ErrorGroupState; }; export type ErrorGroupOccurrences = Awaited>; export type ErrorGroupActivity = ErrorGroupOccurrences["data"]; +export type ErrorGroupActivityVersions = ErrorGroupOccurrences["versions"]; export class ErrorGroupPresenter extends BasePresenter { constructor( @@ -89,6 +105,7 @@ export class ErrorGroupPresenter extends BasePresenter { userId, projectId, fingerprint, + versions, runsPageSize = DEFAULT_RUNS_PAGE_SIZE, period, from, @@ -110,23 +127,36 @@ export class ErrorGroupPresenter extends BasePresenter { defaultPeriod: "7d", }); - const [summary, affectedVersions, runList] = await Promise.all([ + const [summary, affectedVersions, runList, stateRow] = await Promise.all([ this.getSummary(organizationId, projectId, environmentId, fingerprint), this.getAffectedVersions(organizationId, projectId, environmentId, fingerprint), this.getRunList(organizationId, environmentId, { userId, projectId, fingerprint, + versions, pageSize: runsPageSize, from: time.from.getTime(), to: time.to.getTime(), cursor, direction, }), + this.getState(environmentId, fingerprint), ]); if (summary) { summary.affectedVersions = affectedVersions; + summary.state = stateRow ?? { + status: "UNRESOLVED", + resolvedAt: null, + resolvedInVersion: null, + resolvedBy: null, + ignoredUntil: null, + ignoredReason: null, + ignoredByUserId: null, + ignoredUntilOccurrenceRate: null, + ignoredUntilTotalOccurrences: null, + }; } return { @@ -140,8 +170,8 @@ export class ErrorGroupPresenter extends BasePresenter { } /** - * Returns bucketed occurrence counts for a single fingerprint over a time range. - * Granularity is determined automatically from the range span. + * Returns bucketed occurrence counts for a single fingerprint over a time range, + * grouped by task_version for stacked charts. */ public async getOccurrences( organizationId: string, @@ -149,14 +179,17 @@ export class ErrorGroupPresenter extends BasePresenter { environmentId: string, fingerprint: string, from: Date, - to: Date + to: Date, + versions?: string[] ): Promise<{ - data: Array<{ date: Date; count: number }>; + data: Array>; + versions: string[]; }> { const granularityMs = errorGroupGranularity.getTimeGranularityMs(from, to); const intervalExpr = msToClickHouseInterval(granularityMs); - const queryBuilder = this.logsClickhouse.errors.createOccurrencesQueryBuilder(intervalExpr); + const queryBuilder = + this.logsClickhouse.errors.createOccurrencesByVersionQueryBuilder(intervalExpr); queryBuilder.where("organization_id = {organizationId: String}", { organizationId }); queryBuilder.where("project_id = {projectId: String}", { projectId }); @@ -169,7 +202,11 @@ export class ErrorGroupPresenter extends BasePresenter { toTimeMs: to.getTime(), }); - queryBuilder.groupBy("error_fingerprint, bucket_epoch"); + if (versions && versions.length > 0) { + queryBuilder.where("task_version IN {versions: Array(String)}", { versions }); + } + + queryBuilder.groupBy("error_fingerprint, task_version, bucket_epoch"); queryBuilder.orderBy("bucket_epoch ASC"); const [queryError, records] = await queryBuilder.execute(); @@ -186,17 +223,27 @@ export class ErrorGroupPresenter extends BasePresenter { buckets.push(epoch); } - const byBucket = new Map(); + // Collect distinct versions and index results by (epoch, version) + const versionSet = new Set(); + const byBucketVersion = new Map(); for (const row of records ?? []) { - byBucket.set(row.bucket_epoch, (byBucket.get(row.bucket_epoch) ?? 0) + row.count); + const version = row.task_version || "unknown"; + versionSet.add(version); + const key = `${row.bucket_epoch}:${version}`; + byBucketVersion.set(key, (byBucketVersion.get(key) ?? 0) + row.count); } - return { - data: buckets.map((epoch) => ({ - date: new Date(epoch * 1000), - count: byBucket.get(epoch) ?? 0, - })), - }; + const sortedVersions = sortVersionsDescending([...versionSet]); + + const data = buckets.map((epoch) => { + const point: Record = { date: new Date(epoch * 1000) }; + for (const version of sortedVersions) { + point[version] = byBucketVersion.get(`${epoch}:${version}`) ?? 0; + } + return point; + }); + + return { data, versions: sortedVersions }; } private async getSummary( @@ -268,6 +315,31 @@ export class ErrorGroupPresenter extends BasePresenter { return sortVersionsDescending(versions).slice(0, 5); } + private async getState( + environmentId: string, + fingerprint: string + ): Promise { + const row = await this.replica.errorGroupState.findFirst({ + where: { + environmentId, + errorFingerprint: fingerprint, + }, + select: { + status: true, + resolvedAt: true, + resolvedInVersion: true, + resolvedBy: true, + ignoredUntil: true, + ignoredReason: true, + ignoredByUserId: true, + ignoredUntilOccurrenceRate: true, + ignoredUntilTotalOccurrences: true, + }, + }); + + return row; + } + private async getRunList( organizationId: string, environmentId: string, @@ -275,6 +347,7 @@ export class ErrorGroupPresenter extends BasePresenter { userId?: string; projectId: string; fingerprint: string; + versions?: string[]; pageSize: number; from?: number; to?: number; @@ -289,6 +362,7 @@ export class ErrorGroupPresenter extends BasePresenter { projectId: options.projectId, rootOnly: false, errorId: ErrorId.toFriendlyId(options.fingerprint), + versions: options.versions, pageSize: options.pageSize, from: options.from, to: options.to, diff --git a/apps/webapp/app/presenters/v3/ErrorsListPresenter.server.ts b/apps/webapp/app/presenters/v3/ErrorsListPresenter.server.ts index 89832b28340..a8d3d6ab9f8 100644 --- a/apps/webapp/app/presenters/v3/ErrorsListPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/ErrorsListPresenter.server.ts @@ -9,7 +9,7 @@ const errorsListGranularity = new TimeGranularity([ { max: "3 months", granularity: "1w" }, { max: "Infinity", granularity: "30d" }, ]); -import { type PrismaClientOrTransaction } from "@trigger.dev/database"; +import { type ErrorGroupStatus, type PrismaClientOrTransaction } from "@trigger.dev/database"; import { type Direction } from "~/components/ListPagination"; import { timeFilterFromTo } from "~/components/runs/v3/SharedFilters"; import { findDisplayableEnvironment } from "~/models/runtimeEnvironment.server"; @@ -22,6 +22,8 @@ export type ErrorsListOptions = { projectId: string; // filters tasks?: string[]; + versions?: string[]; + statuses?: ErrorGroupStatus[]; period?: string; from?: number; to?: number; @@ -39,6 +41,8 @@ export const ErrorsListOptionsSchema = z.object({ userId: z.string().optional(), projectId: z.string(), tasks: z.array(z.string()).optional(), + versions: z.array(z.string()).optional(), + statuses: z.array(z.enum(["UNRESOLVED", "RESOLVED", "IGNORED"])).optional(), period: z.string().optional(), from: z.number().int().nonnegative().optional(), to: z.number().int().nonnegative().optional(), @@ -88,7 +92,11 @@ function decodeCursor(cursor: string): ErrorGroupCursor | null { } } -function cursorFromRow(row: { occurrence_count: number; error_fingerprint: string; task_identifier: string }): string { +function cursorFromRow(row: { + occurrence_count: number; + error_fingerprint: string; + task_identifier: string; +}): string { return encodeCursor({ occurrenceCount: row.occurrence_count, fingerprint: row.error_fingerprint, @@ -123,6 +131,8 @@ export class ErrorsListPresenter extends BasePresenter { userId, projectId, tasks, + versions, + statuses, period, search, from, @@ -156,7 +166,9 @@ export class ErrorsListPresenter extends BasePresenter { const hasFilters = (tasks !== undefined && tasks.length > 0) || + (versions !== undefined && versions.length > 0) || (search !== undefined && search !== "") || + (statuses !== undefined && statuses.length > 0) || !time.isDefault; const possibleTasksAsync = getAllTaskIdentifiers(this.replica, environmentId); @@ -189,6 +201,10 @@ export class ErrorsListPresenter extends BasePresenter { queryBuilder.where("task_identifier IN {tasks: Array(String)}", { tasks }); } + if (versions && versions.length > 0) { + queryBuilder.where("task_version IN {versions: Array(String)}", { versions }); + } + queryBuilder.groupBy("error_fingerprint, task_identifier"); // Text search via HAVING (operates on aggregated values) @@ -254,15 +270,14 @@ export class ErrorsListPresenter extends BasePresenter { // Fetch global first_seen / last_seen from the errors_v1 summary table const fingerprints = errorGroups.map((e) => e.error_fingerprint); - const globalSummaryMap = await this.getGlobalSummary( - organizationId, - projectId, - environmentId, - fingerprints - ); + const [globalSummaryMap, stateMap] = await Promise.all([ + this.getGlobalSummary(organizationId, projectId, environmentId, fingerprints), + this.getErrorGroupStates(environmentId, errorGroups), + ]); - const transformedErrorGroups = errorGroups.map((error) => { + let transformedErrorGroups = errorGroups.map((error) => { const global = globalSummaryMap.get(error.error_fingerprint); + const state = stateMap.get(`${error.task_identifier}:${error.error_fingerprint}`); return { errorType: error.error_type, errorMessage: error.error_message, @@ -271,9 +286,18 @@ export class ErrorsListPresenter extends BasePresenter { firstSeen: global?.firstSeen ?? new Date(), lastSeen: global?.lastSeen ?? new Date(), count: error.occurrence_count, + status: state?.status ?? "UNRESOLVED", + resolvedAt: state?.resolvedAt ?? null, + ignoredUntil: state?.ignoredUntil ?? null, }; }); + if (statuses && statuses.length > 0) { + transformedErrorGroups = transformedErrorGroups.filter((g) => + statuses.includes(g.status as ErrorGroupStatus) + ); + } + return { errorGroups: transformedErrorGroups, pagination: { @@ -282,6 +306,8 @@ export class ErrorsListPresenter extends BasePresenter { }, filters: { tasks, + versions, + statuses, search, period: time, from: effectiveFrom, @@ -367,6 +393,51 @@ export class ErrorsListPresenter extends BasePresenter { return { data }; } + /** + * Batch-fetch ErrorGroupState rows from Postgres for the given ClickHouse error groups. + * Returns a map keyed by `${taskIdentifier}:${errorFingerprint}`. + */ + private async getErrorGroupStates( + environmentId: string, + errorGroups: Array<{ task_identifier: string; error_fingerprint: string }> + ) { + type StateValue = { + status: ErrorGroupStatus; + resolvedAt: Date | null; + ignoredUntil: Date | null; + }; + + const result = new Map(); + if (errorGroups.length === 0) return result; + + const states = await this.replica.errorGroupState.findMany({ + where: { + environmentId, + OR: errorGroups.map((e) => ({ + taskIdentifier: e.task_identifier, + errorFingerprint: e.error_fingerprint, + })), + }, + select: { + taskIdentifier: true, + errorFingerprint: true, + status: true, + resolvedAt: true, + ignoredUntil: true, + }, + }); + + for (const state of states) { + result.set(`${state.taskIdentifier}:${state.errorFingerprint}`, { + status: state.status, + resolvedAt: state.resolvedAt, + ignoredUntil: state.ignoredUntil, + }); + } + + return result; + } + /** * Fetches global first_seen / last_seen for a set of fingerprints from errors_v1. */ diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.alerts/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.alerts/route.tsx index 1bedd30d0f9..d93e7640d39 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.alerts/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.alerts/route.tsx @@ -63,6 +63,7 @@ import { v3NewProjectAlertPath, v3ProjectAlertsPath, } from "~/utils/pathBuilder"; +import { alertsWorker } from "~/v3/alertsWorker.server"; export const meta: MetaFunction = () => { return [ @@ -156,6 +157,17 @@ export const action = async ({ request, params }: ActionFunctionArgs) => { data: { enabled: true }, }); + if (alertChannel.alertTypes.includes("ERROR_GROUP")) { + await alertsWorker.enqueue({ + id: `evaluateErrorAlerts:${project.id}`, + job: "v3.evaluateErrorAlerts", + payload: { + projectId: project.id, + scheduledAt: Date.now(), + }, + }); + } + return redirectWithSuccessMessage( v3ProjectAlertsPath({ slug: organizationSlug }, { slug: projectParam }, { slug: envParam }), request, @@ -556,7 +568,7 @@ export function alertTypeTitle(alertType: ProjectAlertType): string { case "DEPLOYMENT_SUCCESS": return "Deployment success"; default: { - assertNever(alertType); + throw new Error(`Unknown alertType: ${alertType}`); } } } diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.errors.$fingerprint/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.errors.$fingerprint/route.tsx index 0ff8594fa36..86eb4b45725 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.errors.$fingerprint/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.errors.$fingerprint/route.tsx @@ -1,8 +1,10 @@ -import { type LoaderFunctionArgs } from "@remix-run/server-runtime"; -import { type MetaFunction } from "@remix-run/react"; +import { type LoaderFunctionArgs, type ActionFunctionArgs, json } from "@remix-run/server-runtime"; +import { type MetaFunction, Form, useFetcher } from "@remix-run/react"; +import { parse } from "@conform-to/zod"; +import { z } from "zod"; import { ServiceValidationError } from "~/v3/services/baseService.server"; import { TypedAwait, typeddefer, useTypedLoaderData } from "remix-typedjson"; -import { requireUser } from "~/services/session.server"; +import { requireUser, requireUserId } from "~/services/session.server"; import { EnvironmentParamSchema, v3CreateBulkActionPath, @@ -14,15 +16,17 @@ import { findEnvironmentBySlug } from "~/models/runtimeEnvironment.server"; import { ErrorGroupPresenter, type ErrorGroupActivity, + type ErrorGroupActivityVersions, type ErrorGroupOccurrences, type ErrorGroupSummary, + type ErrorGroupState, } from "~/presenters/v3/ErrorGroupPresenter.server"; import { type NextRunList } from "~/presenters/v3/NextRunListPresenter.server"; import { $replica } from "~/db.server"; import { logsClickhouseClient, clickhouseClient } from "~/services/clickhouseInstance.server"; import { NavBar, PageTitle } from "~/components/primitives/PageHeader"; import { PageBody } from "~/components/layout/AppLayout"; -import { Suspense, useMemo } from "react"; +import { Suspense, useMemo, useState } from "react"; import { Spinner } from "~/components/primitives/Spinner"; import { Paragraph } from "~/components/primitives/Paragraph"; import { Callout } from "~/components/primitives/Callout"; @@ -37,15 +41,32 @@ import { Chart, type ChartConfig } from "~/components/primitives/charts/ChartCom import { TimeFilter, timeFilterFromTo } from "~/components/runs/v3/SharedFilters"; import { useOptimisticLocation } from "~/hooks/useOptimisticLocation"; import { DirectionSchema, ListPagination } from "~/components/ListPagination"; -import { LinkButton } from "~/components/primitives/Buttons"; +import { Button, LinkButton } from "~/components/primitives/Buttons"; import { ListCheckedIcon } from "~/assets/icons/ListCheckedIcon"; import { useOrganization } from "~/hooks/useOrganizations"; import { useProject } from "~/hooks/useProject"; import { useEnvironment } from "~/hooks/useEnvironment"; import { RunsIcon } from "~/assets/icons/RunsIcon"; -import { TaskRunListSearchFilters } from "~/components/runs/v3/RunFilters"; +import type { TaskRunListSearchFilters } from "~/components/runs/v3/RunFilters"; import { useSearchParams } from "~/hooks/useSearchParam"; import { CopyableText } from "~/components/primitives/CopyableText"; +import { LogsVersionFilter } from "~/components/logs/LogsVersionFilter"; +import { getSeriesColor } from "~/components/code/chartColors"; +import { + Popover, + PopoverArrowTrigger, + PopoverContent, + PopoverMenuItem, +} from "~/components/primitives/Popover"; +import { + Dialog, + DialogContent, + DialogFooter, + DialogHeader, + DialogTitle, + DialogTrigger, +} from "~/components/primitives/Dialog"; +import { ErrorGroupActions } from "~/v3/services/errorGroupActions.server"; export const meta: MetaFunction = ({ data }) => { return [ @@ -55,6 +76,86 @@ export const meta: MetaFunction = ({ data }) => { ]; }; +const actionSchema = z.discriminatedUnion("action", [ + z.object({ + action: z.literal("resolve"), + taskIdentifier: z.string(), + resolvedInVersion: z.string().optional(), + }), + z.object({ + action: z.literal("ignore"), + taskIdentifier: z.string(), + duration: z.coerce.number().optional(), + occurrenceRate: z.coerce.number().optional(), + totalOccurrences: z.coerce.number().optional(), + reason: z.string().optional(), + }), + z.object({ + action: z.literal("unresolve"), + taskIdentifier: z.string(), + }), +]); + +export const action = async ({ request, params }: ActionFunctionArgs) => { + const userId = await requireUserId(request); + const { organizationSlug, projectParam, envParam } = EnvironmentParamSchema.parse(params); + const fingerprint = params.fingerprint; + + if (!fingerprint) { + return json({ error: "Fingerprint parameter is required" }, { status: 400 }); + } + + const project = await findProjectBySlug(organizationSlug, projectParam, userId); + if (!project) { + return json({ error: "Project not found" }, { status: 404 }); + } + + const environment = await findEnvironmentBySlug(project.id, envParam, userId); + if (!environment) { + return json({ error: "Environment not found" }, { status: 404 }); + } + + const formData = await request.formData(); + const submission = parse(formData, { schema: actionSchema }); + + if (!submission.value) { + return json(submission); + } + + const actions = new ErrorGroupActions(); + const identifier = { + organizationId: project.organizationId, + projectId: project.id, + environmentId: environment.id, + taskIdentifier: submission.value.taskIdentifier, + errorFingerprint: fingerprint, + }; + + switch (submission.value.action) { + case "resolve": { + await actions.resolveError(identifier, { + userId, + resolvedInVersion: submission.value.resolvedInVersion, + }); + return json({ ok: true }); + } + case "ignore": { + await actions.ignoreError(identifier, { + userId, + duration: submission.value.duration, + occurrenceRateThreshold: submission.value.occurrenceRate, + totalOccurrencesThreshold: submission.value.totalOccurrences, + reason: submission.value.reason, + }); + return json({ ok: true }); + } + case "unresolve": { + await actions.unresolveError(identifier); + return json({ ok: true }); + } + } +}; + export const loader = async ({ request, params }: LoaderFunctionArgs) => { const user = await requireUser(request); const userId = user.id; @@ -82,6 +183,7 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { const toStr = url.searchParams.get("to"); const from = fromStr ? parseInt(fromStr, 10) : undefined; const to = toStr ? parseInt(toStr, 10) : undefined; + const versions = url.searchParams.getAll("versions").filter((v) => v.length > 0); const cursor = url.searchParams.get("cursor") ?? undefined; const directionRaw = url.searchParams.get("direction") ?? undefined; const direction = directionRaw ? DirectionSchema.parse(directionRaw) : undefined; @@ -93,6 +195,7 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { userId, projectId: project.id, fingerprint, + versions: versions.length > 0 ? versions : undefined, period, from, to, @@ -115,9 +218,10 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { environment.id, fingerprint, time.from, - time.to + time.to, + versions.length > 0 ? versions : undefined ) - .catch(() => ({ data: [] as ErrorGroupActivity })); + .catch(() => ({ data: [] as ErrorGroupActivity, versions: [] as string[] })); return typeddefer({ data: detailPromise, @@ -149,6 +253,9 @@ export default function Page() { if (period) carry.set("period", period); if (from) carry.set("from", from); if (to) carry.set("to", to); + for (const v of searchParams.getAll("versions")) { + if (v) carry.append("versions", v); + } const qs = carry.toString(); return qs ? `${base}?${qs}` : base; }, [organizationSlug, projectParam, envParam, searchParams.toString()]); @@ -232,7 +339,7 @@ function ErrorGroupDetail({ envParam: string; fingerprint: string; }) { - const { value } = useSearchParams(); + const { value, values } = useSearchParams(); const organization = useOrganization(); const project = useProject(); const environment = useEnvironment(); @@ -252,11 +359,13 @@ function ErrorGroupDetail({ const fromValue = value("from") ?? undefined; const toValue = value("to") ?? undefined; + const selectedVersions = values("versions").filter((v) => v !== ""); const filters: TaskRunListSearchFilters = { period: value("period") ?? undefined, from: fromValue ? parseInt(fromValue, 10) : undefined, to: toValue ? parseInt(toValue, 10) : undefined, + versions: selectedVersions.length > 0 ? selectedVersions : undefined, rootOnly: false, errorId: ErrorId.toFriendlyId(fingerprint), }; @@ -265,9 +374,16 @@ function ErrorGroupDetail({
{/* Error Summary */}
-
- {errorGroup.errorMessage} - {formatNumberCompact(errorGroup.count)} total occurrences +
+
+ {errorGroup.errorMessage} + {formatNumberCompact(errorGroup.count)} total occurrences +
+
@@ -318,19 +434,19 @@ function ErrorGroupDetail({ {/* Activity chart */}
-
+
+
}> }> - {(result) => - result.data.length > 0 ? ( - - ) : ( - - ) - } + {(result) => { + if (result.data.length > 0 && result.versions.length > 0) { + return ; + } + return ; + }}
@@ -369,10 +485,10 @@ function ErrorGroupDetail({ {runList ? ( 0} filters={{ tasks: [], - versions: [], + versions: selectedVersions, statuses: [], from: undefined, to: undefined, @@ -392,14 +508,205 @@ function ErrorGroupDetail({ ); } -const activityChartConfig: ChartConfig = { - count: { - label: "Occurrences", - color: "#6366F1", - }, -}; +const STATUS_BADGE_STYLES = { + UNRESOLVED: "bg-error/10 text-error", + RESOLVED: "bg-success/10 text-success", + IGNORED: "bg-text-dimmed/10 text-text-dimmed", +} as const; + +const STATUS_LABELS = { + UNRESOLVED: "Unresolved", + RESOLVED: "Resolved", + IGNORED: "Ignored", +} as const; + +function StatusBadge({ status }: { status: ErrorGroupState["status"] }) { + return ( + + {STATUS_LABELS[status]} + + ); +} + +function ErrorGroupActionButtons({ + state, + taskIdentifier, + fingerprint, +}: { + state: ErrorGroupState; + taskIdentifier: string; + fingerprint: string; +}) { + const fetcher = useFetcher(); + const [customIgnoreOpen, setCustomIgnoreOpen] = useState(false); + const isSubmitting = fetcher.state !== "idle"; + + const submitAction = (data: Record) => { + fetcher.submit({ ...data, taskIdentifier }, { method: "post" }); + }; + + return ( +
+ + + {state.status === "UNRESOLVED" && ( + <> + + + Ignore + + submitAction({ action: "ignore", duration: String(60 * 60 * 1000) })} + /> + + submitAction({ + action: "ignore", + duration: String(24 * 60 * 60 * 1000), + }) + } + /> + submitAction({ action: "ignore" })} + /> + setCustomIgnoreOpen(true)} + /> + + + + )} + + {(state.status === "RESOLVED" || state.status === "IGNORED") && ( + + )} + + + + + Custom ignore condition + + setCustomIgnoreOpen(false)} + /> + + +
+ ); +} + +function CustomIgnoreForm({ + taskIdentifier, + onClose, +}: { + taskIdentifier: string; + onClose: () => void; +}) { + const fetcher = useFetcher(); + const isSubmitting = fetcher.state !== "idle"; + + return ( + { + setTimeout(onClose, 100); + }} + > + + + +
+
+ + +
+ +
+ + +
+ +
+ + +
+
+ + + + + +
+ ); +} + +function ActivityChart({ + activity, + versions, +}: { + activity: ErrorGroupActivity; + versions: ErrorGroupActivityVersions; +}) { + const chartConfig = useMemo(() => { + const cfg: ChartConfig = {}; + for (let i = 0; i < versions.length; i++) { + cfg[versions[i]] = { + label: versions[i], + color: getSeriesColor(i), + }; + } + return cfg; + }, [versions]); -function ActivityChart({ activity }: { activity: ErrorGroupActivity }) { const data = useMemo( () => activity.map((d) => ({ @@ -453,13 +760,14 @@ function ActivityChart({ activity }: { activity: ErrorGroupActivity }) { return ( { const url = new URL(request.url); const tasks = url.searchParams.getAll("tasks").filter((t) => t.length > 0); + const versions = url.searchParams.getAll("versions").filter((v) => v.length > 0); + const statuses = url.searchParams + .getAll("status") + .filter( + (s): s is ErrorGroupStatus => s === "UNRESOLVED" || s === "RESOLVED" || s === "IGNORED" + ); const search = url.searchParams.get("search") ?? undefined; const period = url.searchParams.get("period") ?? undefined; const fromStr = url.searchParams.get("from"); @@ -101,6 +121,8 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { userId, projectId: project.id, tasks: tasks.length > 0 ? tasks : undefined, + versions: versions.length > 0 ? versions : undefined, + statuses: statuses.length > 0 ? statuses : undefined, search, period, from, @@ -226,6 +248,120 @@ export default function Page() { ); } +const errorStatusOptions = [ + { value: "UNRESOLVED", label: "Unresolved" }, + { value: "RESOLVED", label: "Resolved" }, + { value: "IGNORED", label: "Ignored" }, +] as const; + +const statusIcon = ; +const statusShortcut = { key: "s" }; + +function StatusFilter() { + const { values, del } = useSearchParams(); + const selectedStatuses = values("status"); + + if (selectedStatuses.length === 0 || selectedStatuses.every((v) => v === "")) { + return ( + + {(search, setSearch) => ( + + Status + + } + searchValue={search} + clearSearchValue={() => setSearch("")} + /> + )} + + ); + } + + return ( + + {(search, setSearch) => ( + }> + { + const opt = errorStatusOptions.find((o) => o.value === s); + return opt ? opt.label : s; + }) + )} + onRemove={() => del(["status", "cursor", "direction"])} + variant="secondary/small" + /> + + } + searchValue={search} + clearSearchValue={() => setSearch("")} + /> + )} + + ); +} + +function ErrorStatusDropdown({ + trigger, + clearSearchValue, + searchValue, + onClose, +}: { + trigger: ReactNode; + clearSearchValue: () => void; + searchValue: string; + onClose?: () => void; +}) { + const { values, replace } = useSearchParams(); + + const handleChange = (values: string[]) => { + clearSearchValue(); + replace({ status: values.length > 0 ? values : undefined, cursor: undefined, direction: undefined }); + }; + + const filtered = useMemo(() => { + return errorStatusOptions.filter((item) => + item.label.toLowerCase().includes(searchValue.toLowerCase()) + ); + }, [searchValue]); + + return ( + + {trigger} + { + if (onClose) { + onClose(); + return false; + } + return true; + }} + > + + + {filtered.map((item) => ( + + {item.label} + + ))} + + + + ); +} + function FiltersBar({ list, defaultPeriod, @@ -238,7 +374,9 @@ function FiltersBar({ const location = useOptimisticLocation(); const searchParams = new URLSearchParams(location.search); const hasFilters = + searchParams.has("status") || searchParams.has("tasks") || + searchParams.has("versions") || searchParams.has("search") || searchParams.has("period") || searchParams.has("from") || @@ -249,7 +387,9 @@ function FiltersBar({
{list ? ( <> + + ) : ( <> + + {hasFilters && ( @@ -319,6 +461,7 @@ function ErrorsList({ ID + Status Task Error Occurrences @@ -373,6 +516,9 @@ function ErrorGroupRow({ if (period) carry.set("period", period); if (from) carry.set("from", from); if (to) carry.set("to", to); + for (const v of searchParams.getAll("versions")) { + if (v) carry.append("versions", v); + } const qs = carry.toString(); return qs ? `${base}?${qs}` : base; }, [organizationSlug, projectParam, envParam, errorGroup.fingerprint, searchParams.toString()]); @@ -384,9 +530,12 @@ function ErrorGroupRow({ {errorGroup.fingerprint.slice(-8)} + + + {errorGroup.taskIdentifier} - {errorMessage} + {errorMessage.length > 128 ? `${errorMessage.slice(0, 128)}…` : errorMessage} {errorGroup.count.toLocaleString()} @@ -413,6 +562,27 @@ function ErrorGroupRow({ ); } +const LIST_STATUS_STYLES = { + UNRESOLVED: "text-error", + RESOLVED: "text-success", + IGNORED: "text-text-dimmed", +} as const; + +const LIST_STATUS_LABELS = { + UNRESOLVED: "Unresolved", + RESOLVED: "Resolved", + IGNORED: "Ignored", +} as const; + +function ListStatusBadge({ status }: { status: string }) { + const s = (status as keyof typeof LIST_STATUS_STYLES) ?? "UNRESOLVED"; + return ( + + {LIST_STATUS_LABELS[s] ?? "Unresolved"} + + ); +} + function ErrorActivityGraph({ activity }: { activity: ErrorOccurrenceActivity }) { const maxCount = Math.max(...activity.map((d) => d.count)); diff --git a/apps/webapp/app/v3/alertsWorker.server.ts b/apps/webapp/app/v3/alertsWorker.server.ts index 46670887a75..e8cd6abee73 100644 --- a/apps/webapp/app/v3/alertsWorker.server.ts +++ b/apps/webapp/app/v3/alertsWorker.server.ts @@ -1,10 +1,12 @@ import { Logger } from "@trigger.dev/core/logger"; -import { Worker as RedisWorker } from "@trigger.dev/redis-worker"; +import { CronSchema, Worker as RedisWorker } from "@trigger.dev/redis-worker"; import { z } from "zod"; import { env } from "~/env.server"; import { logger } from "~/services/logger.server"; import { singleton } from "~/utils/singleton"; import { DeliverAlertService } from "./services/alerts/deliverAlert.server"; +import { DeliverErrorGroupAlertService } from "./services/alerts/deliverErrorGroupAlert.server"; +import { ErrorAlertEvaluator } from "./services/alerts/errorAlertEvaluator.server"; import { PerformDeploymentAlertsService } from "./services/alerts/performDeploymentAlerts.server"; import { PerformTaskRunAlertsService } from "./services/alerts/performTaskRunAlerts.server"; @@ -55,6 +57,41 @@ function initializeWorker() { }, logErrors: false, }, + "v3.evaluateErrorAlerts": { + schema: z.object({ + projectId: z.string(), + scheduledAt: z.number(), + }), + visibilityTimeoutMs: 60_000 * 5, + retry: { + maxAttempts: 3, + }, + logErrors: true, + }, + "v3.deliverErrorGroupAlert": { + schema: z.object({ + channelId: z.string(), + projectId: z.string(), + classification: z.enum(["new_issue", "regression", "unignored"]), + error: z.object({ + fingerprint: z.string(), + environmentId: z.string(), + environmentName: z.string(), + taskIdentifier: z.string(), + errorType: z.string(), + errorMessage: z.string(), + sampleStackTrace: z.string(), + firstSeen: z.string(), + lastSeen: z.string(), + occurrenceCount: z.number(), + }), + }), + visibilityTimeoutMs: 60_000, + retry: { + maxAttempts: 3, + }, + logErrors: true, + }, }, concurrency: { workers: env.ALERTS_WORKER_CONCURRENCY_WORKERS, @@ -80,6 +117,14 @@ function initializeWorker() { const service = new PerformTaskRunAlertsService(); await service.call(payload.runId); }, + "v3.evaluateErrorAlerts": async ({ payload }) => { + const evaluator = new ErrorAlertEvaluator(); + await evaluator.evaluate(payload.projectId, payload.scheduledAt); + }, + "v3.deliverErrorGroupAlert": async ({ payload }) => { + const service = new DeliverErrorGroupAlertService(); + await service.call(payload); + }, }, }); diff --git a/apps/webapp/app/v3/services/alerts/createAlertChannel.server.ts b/apps/webapp/app/v3/services/alerts/createAlertChannel.server.ts index b2bbb423983..c87218f2bfc 100644 --- a/apps/webapp/app/v3/services/alerts/createAlertChannel.server.ts +++ b/apps/webapp/app/v3/services/alerts/createAlertChannel.server.ts @@ -7,6 +7,7 @@ import { nanoid } from "nanoid"; import { env } from "~/env.server"; import { findProjectByRef } from "~/models/project.server"; import { encryptSecret } from "~/services/secrets/secretStore.server"; +import { alertsWorker } from "~/v3/alertsWorker.server"; import { generateFriendlyId } from "~/v3/friendlyIdentifiers"; import { BaseService, ServiceValidationError } from "../baseService.server"; @@ -60,7 +61,7 @@ export class CreateAlertChannelService extends BaseService { : undefined; if (existingAlertChannel) { - return await this._prisma.projectAlertChannel.update({ + const updated = await this._prisma.projectAlertChannel.update({ where: { id: existingAlertChannel.id }, data: { name: options.name, @@ -70,6 +71,12 @@ export class CreateAlertChannelService extends BaseService { environmentTypes, }, }); + + if (options.alertTypes.includes("ERROR_GROUP")) { + await this.#scheduleErrorAlertEvaluation(project.id); + } + + return updated; } const alertChannel = await this._prisma.projectAlertChannel.create({ @@ -87,9 +94,24 @@ export class CreateAlertChannelService extends BaseService { }, }); + if (options.alertTypes.includes("ERROR_GROUP")) { + await this.#scheduleErrorAlertEvaluation(project.id); + } + return alertChannel; } + async #scheduleErrorAlertEvaluation(projectId: string): Promise { + await alertsWorker.enqueue({ + id: `evaluateErrorAlerts:${projectId}`, + job: "v3.evaluateErrorAlerts", + payload: { + projectId, + scheduledAt: Date.now(), + }, + }); + } + async #createProperties(channel: CreateAlertChannelOptions["channel"]) { switch (channel.type) { case "EMAIL": diff --git a/apps/webapp/app/v3/services/alerts/deliverErrorGroupAlert.server.ts b/apps/webapp/app/v3/services/alerts/deliverErrorGroupAlert.server.ts new file mode 100644 index 00000000000..a04ed49a00b --- /dev/null +++ b/apps/webapp/app/v3/services/alerts/deliverErrorGroupAlert.server.ts @@ -0,0 +1,380 @@ +import { + type ChatPostMessageArguments, + ErrorCode, + type WebAPIHTTPError, + type WebAPIPlatformError, + type WebAPIRateLimitedError, + type WebAPIRequestError, +} from "@slack/web-api"; +import { type ProjectAlertChannelType } from "@trigger.dev/database"; +import assertNever from "assert-never"; +import { prisma } from "~/db.server"; +import { env } from "~/env.server"; +import { + isIntegrationForService, + type OrganizationIntegrationForService, + OrgIntegrationRepository, +} from "~/models/orgIntegration.server"; +import { + ProjectAlertEmailProperties, + ProjectAlertSlackProperties, + ProjectAlertWebhookProperties, +} from "~/models/projectAlert.server"; +import { sendAlertEmail } from "~/services/email.server"; +import { logger } from "~/services/logger.server"; +import { decryptSecret } from "~/services/secrets/secretStore.server"; +import { subtle } from "crypto"; + +type ErrorAlertClassification = "new_issue" | "regression" | "unignored"; + +interface ErrorAlertPayload { + channelId: string; + projectId: string; + classification: ErrorAlertClassification; + error: { + fingerprint: string; + environmentId: string; + environmentName: string; + taskIdentifier: string; + errorType: string; + errorMessage: string; + sampleStackTrace: string; + firstSeen: string; + lastSeen: string; + occurrenceCount: number; + }; +} + +class SkipRetryError extends Error {} + +export class DeliverErrorGroupAlertService { + async call(payload: ErrorAlertPayload): Promise { + const channel = await prisma.projectAlertChannel.findFirst({ + where: { id: payload.channelId, enabled: true }, + include: { + project: { + include: { + organization: true, + }, + }, + }, + }); + + if (!channel) { + logger.warn("[DeliverErrorGroupAlert] Channel not found or disabled", { + channelId: payload.channelId, + }); + return; + } + + const errorLink = this.#buildErrorLink(channel.project, payload.error); + + try { + switch (channel.type) { + case "EMAIL": + await this.#sendEmail(channel, payload, errorLink); + break; + case "SLACK": + await this.#sendSlack(channel, payload, errorLink); + break; + case "WEBHOOK": + await this.#sendWebhook(channel, payload, errorLink); + break; + default: + assertNever(channel.type); + } + } catch (error) { + if (error instanceof SkipRetryError) { + logger.warn("[DeliverErrorGroupAlert] Skipping retry", { reason: (error as Error).message }); + return; + } + throw error; + } + } + + #buildErrorLink( + project: { externalRef: string }, + error: ErrorAlertPayload["error"] + ): string { + return `${env.APP_ORIGIN}/projects/v3/${project.externalRef}/errors/${error.fingerprint}`; + } + + #classificationLabel(classification: ErrorAlertClassification): string { + switch (classification) { + case "new_issue": + return "New error"; + case "regression": + return "Regression"; + case "unignored": + return "Error resurfaced"; + } + } + + async #sendEmail( + channel: { type: ProjectAlertChannelType; properties: unknown; project: { name: string; organization: { title: string } } }, + payload: ErrorAlertPayload, + errorLink: string + ): Promise { + const emailProperties = ProjectAlertEmailProperties.safeParse(channel.properties); + if (!emailProperties.success) { + logger.error("[DeliverErrorGroupAlert] Failed to parse email properties", { + issues: emailProperties.error.issues, + }); + return; + } + + await sendAlertEmail({ + email: "alert-error-group", + to: emailProperties.data.email, + classification: payload.classification, + taskIdentifier: payload.error.taskIdentifier, + environment: payload.error.environmentName, + error: { + message: payload.error.errorMessage, + type: payload.error.errorType, + stackTrace: payload.error.sampleStackTrace || undefined, + }, + occurrenceCount: payload.error.occurrenceCount, + errorLink, + organization: channel.project.organization.title, + project: channel.project.name, + }); + } + + async #sendSlack( + channel: { + type: ProjectAlertChannelType; + properties: unknown; + project: { organizationId: string; name: string; organization: { title: string } }; + }, + payload: ErrorAlertPayload, + errorLink: string + ): Promise { + const slackProperties = ProjectAlertSlackProperties.safeParse(channel.properties); + if (!slackProperties.success) { + logger.error("[DeliverErrorGroupAlert] Failed to parse slack properties", { + issues: slackProperties.error.issues, + }); + return; + } + + const integration = slackProperties.data.integrationId + ? await prisma.organizationIntegration.findFirst({ + where: { + id: slackProperties.data.integrationId, + organizationId: channel.project.organizationId, + }, + include: { tokenReference: true }, + }) + : await prisma.organizationIntegration.findFirst({ + where: { + service: "SLACK", + organizationId: channel.project.organizationId, + }, + orderBy: { createdAt: "desc" }, + include: { tokenReference: true }, + }); + + if (!integration || !isIntegrationForService(integration, "SLACK")) { + logger.error("[DeliverErrorGroupAlert] Slack integration not found"); + return; + } + + const label = this.#classificationLabel(payload.classification); + const errorType = payload.error.errorType || "Error"; + const task = payload.error.taskIdentifier; + const envName = payload.error.environmentName; + + const emoji = + payload.classification === "new_issue" + ? ":rotating_light:" + : payload.classification === "regression" + ? ":warning:" + : ":bell:"; + + await this.#postSlackMessage(integration, { + channel: slackProperties.data.channelId, + text: `${label}: ${errorType} in ${task} [${envName}]`, + blocks: [ + { + type: "section", + text: { + type: "mrkdwn", + text: `${emoji} *${label}* in *${task}* [${envName}]`, + }, + }, + { + type: "section", + text: { + type: "mrkdwn", + text: this.#wrapInCodeBlock( + payload.error.sampleStackTrace || payload.error.errorMessage + ), + }, + }, + { + type: "context", + elements: [ + { + type: "mrkdwn", + text: `> *${task}* | ${envName} | ${channel.project.name}\n> ${payload.error.occurrenceCount} occurrences | ${this.#formatTimestamp(new Date(Number(payload.error.lastSeen)))}`, + }, + ], + }, + { + type: "actions", + elements: [ + { + type: "button", + text: { type: "plain_text", text: "Investigate" }, + url: errorLink, + }, + ], + }, + ], + }); + } + + async #sendWebhook( + channel: { + type: ProjectAlertChannelType; + properties: unknown; + project: { id: string; externalRef: string; slug: string; name: string; organizationId: string; organization: { slug: string; title: string } }; + }, + payload: ErrorAlertPayload, + errorLink: string + ): Promise { + const webhookProperties = ProjectAlertWebhookProperties.safeParse(channel.properties); + if (!webhookProperties.success) { + logger.error("[DeliverErrorGroupAlert] Failed to parse webhook properties", { + issues: webhookProperties.error.issues, + }); + return; + } + + const webhookPayload = { + type: "alert.error_group" as const, + classification: payload.classification, + error: { + fingerprint: payload.error.fingerprint, + type: payload.error.errorType, + message: payload.error.errorMessage, + stackTrace: payload.error.sampleStackTrace || undefined, + firstSeen: payload.error.firstSeen, + lastSeen: payload.error.lastSeen, + occurrenceCount: payload.error.occurrenceCount, + taskIdentifier: payload.error.taskIdentifier, + }, + environment: { + id: payload.error.environmentId, + name: payload.error.environmentName, + }, + organization: { + id: channel.project.organizationId, + slug: channel.project.organization.slug, + name: channel.project.organization.title, + }, + project: { + id: channel.project.id, + ref: channel.project.externalRef, + slug: channel.project.slug, + name: channel.project.name, + }, + dashboardUrl: errorLink, + }; + + const rawPayload = JSON.stringify(webhookPayload); + const hashPayload = Buffer.from(rawPayload, "utf-8"); + const secret = await decryptSecret(env.ENCRYPTION_KEY, webhookProperties.data.secret); + const hmacSecret = Buffer.from(secret, "utf-8"); + const key = await subtle.importKey( + "raw", + hmacSecret, + { name: "HMAC", hash: "SHA-256" }, + false, + ["sign"] + ); + const signature = await subtle.sign("HMAC", key, hashPayload); + const signatureHex = Buffer.from(signature).toString("hex"); + + const response = await fetch(webhookProperties.data.url, { + method: "POST", + headers: { + "content-type": "application/json", + "x-trigger-signature-hmacsha256": signatureHex, + }, + body: rawPayload, + signal: AbortSignal.timeout(5000), + }); + + if (!response.ok) { + logger.info("[DeliverErrorGroupAlert] Failed to send webhook", { + status: response.status, + statusText: response.statusText, + url: webhookProperties.data.url, + }); + throw new Error(`Failed to send error group alert webhook to ${webhookProperties.data.url}`); + } + } + + async #postSlackMessage( + integration: OrganizationIntegrationForService<"SLACK">, + message: ChatPostMessageArguments + ) { + const client = await OrgIntegrationRepository.getAuthenticatedClientForIntegration( + integration, + { forceBotToken: true } + ); + + try { + return await client.chat.postMessage({ + ...message, + unfurl_links: false, + unfurl_media: false, + }); + } catch (error) { + if (isWebAPIRateLimitedError(error)) { + throw new Error("Slack rate limited"); + } + if (isWebAPIPlatformError(error)) { + if ( + (error as WebAPIPlatformError).data.error === "invalid_blocks" || + (error as WebAPIPlatformError).data.error === "account_inactive" + ) { + throw new SkipRetryError(`Slack: ${(error as WebAPIPlatformError).data.error}`); + } + throw new Error("Slack platform error"); + } + throw error; + } + } + + #wrapInCodeBlock(text: string, maxLength = 3000) { + const truncated = + text.length > maxLength - 10 + ? text.slice(0, maxLength - 10 - 50) + + "\n\ntruncated - check dashboard for complete error message" + : text; + return `\`\`\`${truncated}\`\`\``; + } + + #formatTimestamp(date: Date): string { + return new Intl.DateTimeFormat("en-US", { + month: "short", + day: "numeric", + year: "numeric", + hour: "numeric", + minute: "2-digit", + second: "2-digit", + hour12: true, + }).format(date); + } +} + +function isWebAPIPlatformError(error: unknown): error is WebAPIPlatformError { + return (error as WebAPIPlatformError).code === ErrorCode.PlatformError; +} + +function isWebAPIRateLimitedError(error: unknown): error is WebAPIRateLimitedError { + return (error as WebAPIRateLimitedError).code === ErrorCode.RateLimitedError; +} diff --git a/apps/webapp/app/v3/services/alerts/errorAlertEvaluator.server.ts b/apps/webapp/app/v3/services/alerts/errorAlertEvaluator.server.ts new file mode 100644 index 00000000000..a2b79ed1eed --- /dev/null +++ b/apps/webapp/app/v3/services/alerts/errorAlertEvaluator.server.ts @@ -0,0 +1,406 @@ +import { type ActiveErrorsSinceQueryResult, type ClickHouse } from "@internal/clickhouse"; +import { + type ErrorGroupState, + type PrismaClientOrTransaction, + type ProjectAlertChannel, + type RuntimeEnvironmentType, +} from "@trigger.dev/database"; +import { $replica, prisma } from "~/db.server"; +import { ErrorAlertConfig } from "~/models/projectAlert.server"; +import { clickhouseClient } from "~/services/clickhouseInstance.server"; +import { logger } from "~/services/logger.server"; +import { alertsWorker } from "~/v3/alertsWorker.server"; + +type ErrorClassification = "new_issue" | "regression" | "unignored"; + +interface AlertableError { + classification: ErrorClassification; + error: ActiveErrorsSinceQueryResult; + environmentName: string; +} + +interface ResolvedEnvironment { + id: string; + type: RuntimeEnvironmentType; + displayName: string; +} + +const DEFAULT_INTERVAL_MS = 300_000; + +export class ErrorAlertEvaluator { + constructor( + protected readonly _prisma: PrismaClientOrTransaction = $replica, + protected readonly _replica: PrismaClientOrTransaction = prisma, + protected readonly _clickhouse: ClickHouse = clickhouseClient + ) {} + + async evaluate(projectId: string, scheduledAt: number): Promise { + const nextScheduledAt = Date.now(); + + const channels = await this.resolveChannels(projectId); + if (channels.length === 0) { + logger.info("[ErrorAlertEvaluator] No active ERROR_GROUP channels, self-terminating", { + projectId, + }); + return; + } + + const minIntervalMs = this.computeMinInterval(channels); + const allEnvTypes = this.collectEnvironmentTypes(channels); + const environments = await this.resolveEnvironments(projectId, allEnvTypes); + + if (environments.length === 0) { + logger.info("[ErrorAlertEvaluator] No matching environments found", { projectId }); + await this.selfChain(projectId, nextScheduledAt, minIntervalMs); + return; + } + + const envIds = environments.map((e) => e.id); + const envMap = new Map(environments.map((e) => [e.id, e])); + const channelsByEnvId = this.buildChannelsByEnvId(channels, environments); + + const activeErrors = await this.getActiveErrors(projectId, envIds, scheduledAt); + + if (activeErrors.length === 0) { + await this.selfChain(projectId, nextScheduledAt, minIntervalMs); + return; + } + + const states = await this.getErrorGroupStates(projectId, activeErrors, envIds); + const stateMap = this.buildStateMap(states); + + const occurrenceCounts = await this.getOccurrenceCountsSince(projectId, envIds, scheduledAt); + const occurrenceMap = this.buildOccurrenceMap(occurrenceCounts); + + const alertableErrors: AlertableError[] = []; + + for (const error of activeErrors) { + const key = `${error.environment_id}:${error.task_identifier}:${error.error_fingerprint}`; + const state = stateMap.get(key); + const env = envMap.get(error.environment_id); + const firstSeenMs = Number(error.first_seen); + + const classification = this.classifyError(error, state, firstSeenMs, scheduledAt, { + occurrencesSince: occurrenceMap.get(key) ?? 0, + windowMs: nextScheduledAt - scheduledAt, + }); + + if (classification) { + alertableErrors.push({ + classification, + error, + environmentName: env?.displayName ?? error.environment_id, + }); + } + } + + const stateUpdates = alertableErrors.filter( + (a) => a.classification === "regression" || a.classification === "unignored" + ); + await this.updateErrorGroupStates(stateUpdates, stateMap); + + for (const alertable of alertableErrors) { + const envChannels = channelsByEnvId.get(alertable.error.environment_id) ?? []; + for (const channel of envChannels) { + await alertsWorker.enqueue({ + job: "v3.deliverErrorGroupAlert", + payload: { + channelId: channel.id, + projectId, + classification: alertable.classification, + error: { + fingerprint: alertable.error.error_fingerprint, + environmentId: alertable.error.environment_id, + environmentName: alertable.environmentName, + taskIdentifier: alertable.error.task_identifier, + errorType: alertable.error.error_type, + errorMessage: alertable.error.error_message, + sampleStackTrace: alertable.error.sample_stack_trace, + firstSeen: alertable.error.first_seen, + lastSeen: alertable.error.last_seen, + occurrenceCount: alertable.error.occurrence_count, + }, + }, + }); + } + } + + logger.info("[ErrorAlertEvaluator] Evaluation complete", { + projectId, + activeErrors: activeErrors.length, + alertableErrors: alertableErrors.length, + deliveryJobsEnqueued: alertableErrors.reduce( + (sum, a) => sum + (channelsByEnvId.get(a.error.environment_id)?.length ?? 0), + 0 + ), + }); + + await this.selfChain(projectId, nextScheduledAt, minIntervalMs); + } + + private classifyError( + error: ActiveErrorsSinceQueryResult, + state: ErrorGroupState | undefined, + firstSeenMs: number, + scheduledAt: number, + thresholdContext: { occurrencesSince: number; windowMs: number } + ): ErrorClassification | null { + if (!state) { + return firstSeenMs > scheduledAt ? "new_issue" : null; + } + + switch (state.status) { + case "UNRESOLVED": + return null; + + case "RESOLVED": { + if (!state.resolvedAt) return null; + const lastSeenMs = Number(error.last_seen); + return lastSeenMs > state.resolvedAt.getTime() ? "regression" : null; + } + + case "IGNORED": + return this.isIgnoreBreached(state, thresholdContext) ? "unignored" : null; + + default: + return null; + } + } + + private isIgnoreBreached( + state: ErrorGroupState, + context: { occurrencesSince: number; windowMs: number } + ): boolean { + if (state.ignoredUntil && state.ignoredUntil.getTime() <= Date.now()) { + return true; + } + + if ( + state.ignoredUntilOccurrenceRate !== null && + state.ignoredUntilOccurrenceRate !== undefined + ) { + const windowMinutes = Math.max(context.windowMs / 60_000, 1); + const rate = context.occurrencesSince / windowMinutes; + if (rate > state.ignoredUntilOccurrenceRate) { + return true; + } + } + + if ( + state.ignoredUntilTotalOccurrences !== null && + state.ignoredUntilTotalOccurrences !== undefined && + state.ignoredAt + ) { + if (context.occurrencesSince >= state.ignoredUntilTotalOccurrences) { + return true; + } + } + + return false; + } + + private async resolveChannels(projectId: string): Promise { + return this._replica.projectAlertChannel.findMany({ + where: { + projectId, + alertTypes: { has: "ERROR_GROUP" }, + enabled: true, + }, + }); + } + + private computeMinInterval(channels: ProjectAlertChannel[]): number { + let min = DEFAULT_INTERVAL_MS; + for (const ch of channels) { + const config = ErrorAlertConfig.safeParse(ch.errorAlertConfig); + if (config.success) { + min = Math.min(min, config.data.evaluationIntervalMs); + } + } + return min; + } + + private collectEnvironmentTypes(channels: ProjectAlertChannel[]): RuntimeEnvironmentType[] { + const types = new Set(); + for (const ch of channels) { + for (const t of ch.environmentTypes) { + types.add(t); + } + } + return Array.from(types); + } + + private async resolveEnvironments( + projectId: string, + types: RuntimeEnvironmentType[] + ): Promise { + const envs = await this._replica.runtimeEnvironment.findMany({ + where: { + projectId, + type: { in: types }, + }, + select: { + id: true, + type: true, + slug: true, + branchName: true, + }, + }); + + return envs.map((e) => ({ + id: e.id, + type: e.type, + displayName: e.branchName ?? e.slug, + })); + } + + private buildChannelsByEnvId( + channels: ProjectAlertChannel[], + environments: ResolvedEnvironment[] + ): Map { + const result = new Map(); + for (const env of environments) { + const matching = channels.filter((ch) => ch.environmentTypes.includes(env.type)); + if (matching.length > 0) { + result.set(env.id, matching); + } + } + return result; + } + + private async getActiveErrors( + projectId: string, + envIds: string[], + scheduledAt: number + ): Promise { + const qb = this._clickhouse.errors.activeErrorsSinceQueryBuilder(); + qb.where("project_id = {projectId: String}", { projectId }); + qb.where("environment_id IN {envIds: Array(String)}", { envIds }); + qb.groupBy("environment_id, task_identifier, error_fingerprint"); + qb.having("max(last_seen) > fromUnixTimestamp64Milli({scheduledAt: Int64})", { + scheduledAt, + }); + + const [err, results] = await qb.execute(); + if (err) { + logger.error("[ErrorAlertEvaluator] Failed to query active errors", { error: err }); + return []; + } + return results ?? []; + } + + private async getErrorGroupStates( + projectId: string, + activeErrors: ActiveErrorsSinceQueryResult[], + envIds: string[] + ): Promise { + const fingerprints = [...new Set(activeErrors.map((e) => e.error_fingerprint))]; + if (fingerprints.length === 0) return []; + + return this._replica.errorGroupState.findMany({ + where: { + projectId, + errorFingerprint: { in: fingerprints }, + environmentId: { in: envIds }, + }, + }); + } + + private buildStateMap(states: ErrorGroupState[]): Map { + const map = new Map(); + for (const s of states) { + map.set(`${s.environmentId}:${s.taskIdentifier}:${s.errorFingerprint}`, s); + } + return map; + } + + private async getOccurrenceCountsSince( + projectId: string, + envIds: string[], + scheduledAt: number + ): Promise< + Array<{ + environment_id: string; + task_identifier: string; + error_fingerprint: string; + occurrences_since: number; + }> + > { + const qb = this._clickhouse.errors.occurrenceCountsSinceQueryBuilder(); + qb.where("project_id = {projectId: String}", { projectId }); + qb.where("environment_id IN {envIds: Array(String)}", { envIds }); + qb.where("minute >= toStartOfMinute(fromUnixTimestamp64Milli({scheduledAt: Int64}))", { + scheduledAt, + }); + qb.groupBy("environment_id, task_identifier, error_fingerprint"); + + const [err, results] = await qb.execute(); + if (err) { + logger.error("[ErrorAlertEvaluator] Failed to query occurrence counts", { error: err }); + return []; + } + return results ?? []; + } + + private buildOccurrenceMap( + counts: Array<{ + environment_id: string; + task_identifier: string; + error_fingerprint: string; + occurrences_since: number; + }> + ): Map { + const map = new Map(); + for (const c of counts) { + map.set( + `${c.environment_id}:${c.task_identifier}:${c.error_fingerprint}`, + c.occurrences_since + ); + } + return map; + } + + private async updateErrorGroupStates( + alertableErrors: AlertableError[], + stateMap: Map + ): Promise { + for (const alertable of alertableErrors) { + const key = `${alertable.error.environment_id}:${alertable.error.task_identifier}:${alertable.error.error_fingerprint}`; + const state = stateMap.get(key); + if (!state) continue; + + await this, + this._prisma.errorGroupState.update({ + where: { id: state.id }, + data: { + status: "UNRESOLVED", + ignoredUntil: null, + ignoredUntilOccurrenceRate: null, + ignoredUntilTotalOccurrences: null, + ignoredAt: null, + ignoredReason: null, + ignoredByUserId: null, + resolvedAt: null, + resolvedInVersion: null, + resolvedBy: null, + }, + }); + } + } + + private async selfChain( + projectId: string, + nextScheduledAt: number, + intervalMs: number + ): Promise { + await alertsWorker.enqueue({ + id: `evaluateErrorAlerts:${projectId}`, + job: "v3.evaluateErrorAlerts", + payload: { + projectId, + scheduledAt: nextScheduledAt, + }, + availableAt: new Date(nextScheduledAt + intervalMs), + }); + } +} diff --git a/apps/webapp/app/v3/services/errorGroupActions.server.ts b/apps/webapp/app/v3/services/errorGroupActions.server.ts new file mode 100644 index 00000000000..5919e84b5e4 --- /dev/null +++ b/apps/webapp/app/v3/services/errorGroupActions.server.ts @@ -0,0 +1,140 @@ +import { type PrismaClientOrTransaction, prisma } from "~/db.server"; + +type ErrorGroupIdentifier = { + organizationId: string; + projectId: string; + environmentId: string; + taskIdentifier: string; + errorFingerprint: string; +}; + +export class ErrorGroupActions { + constructor(private readonly _prisma: PrismaClientOrTransaction = prisma) {} + + async resolveError( + identifier: ErrorGroupIdentifier, + params: { + userId: string; + resolvedInVersion?: string; + } + ) { + const where = { + environmentId_taskIdentifier_errorFingerprint: { + environmentId: identifier.environmentId, + taskIdentifier: identifier.taskIdentifier, + errorFingerprint: identifier.errorFingerprint, + }, + }; + + const now = new Date(); + + return this._prisma.errorGroupState.upsert({ + where, + update: { + status: "RESOLVED", + resolvedAt: now, + resolvedInVersion: params.resolvedInVersion ?? null, + resolvedBy: params.userId, + ignoredUntil: null, + ignoredUntilOccurrenceRate: null, + ignoredUntilTotalOccurrences: null, + ignoredAt: null, + ignoredReason: null, + ignoredByUserId: null, + }, + create: { + organizationId: identifier.organizationId, + projectId: identifier.projectId, + environmentId: identifier.environmentId, + taskIdentifier: identifier.taskIdentifier, + errorFingerprint: identifier.errorFingerprint, + status: "RESOLVED", + resolvedAt: now, + resolvedInVersion: params.resolvedInVersion ?? null, + resolvedBy: params.userId, + }, + }); + } + + async ignoreError( + identifier: ErrorGroupIdentifier, + params: { + userId: string; + duration?: number; + occurrenceRateThreshold?: number; + totalOccurrencesThreshold?: number; + reason?: string; + } + ) { + const where = { + environmentId_taskIdentifier_errorFingerprint: { + environmentId: identifier.environmentId, + taskIdentifier: identifier.taskIdentifier, + errorFingerprint: identifier.errorFingerprint, + }, + }; + + const now = new Date(); + const ignoredUntil = params.duration ? new Date(now.getTime() + params.duration) : null; + + const data = { + status: "IGNORED" as const, + ignoredAt: now, + ignoredUntil, + ignoredUntilOccurrenceRate: params.occurrenceRateThreshold ?? null, + ignoredUntilTotalOccurrences: params.totalOccurrencesThreshold ?? null, + ignoredReason: params.reason ?? null, + ignoredByUserId: params.userId, + resolvedAt: null, + resolvedInVersion: null, + resolvedBy: null, + }; + + return this._prisma.errorGroupState.upsert({ + where, + update: data, + create: { + organizationId: identifier.organizationId, + projectId: identifier.projectId, + environmentId: identifier.environmentId, + taskIdentifier: identifier.taskIdentifier, + errorFingerprint: identifier.errorFingerprint, + ...data, + }, + }); + } + + async unresolveError(identifier: ErrorGroupIdentifier) { + const where = { + environmentId_taskIdentifier_errorFingerprint: { + environmentId: identifier.environmentId, + taskIdentifier: identifier.taskIdentifier, + errorFingerprint: identifier.errorFingerprint, + }, + }; + + return this._prisma.errorGroupState.upsert({ + where, + update: { + status: "UNRESOLVED", + resolvedAt: null, + resolvedInVersion: null, + resolvedBy: null, + ignoredUntil: null, + ignoredUntilOccurrenceRate: null, + ignoredUntilTotalOccurrences: null, + ignoredAt: null, + ignoredReason: null, + ignoredByUserId: null, + }, + create: { + organizationId: identifier.organizationId, + projectId: identifier.projectId, + environmentId: identifier.environmentId, + taskIdentifier: identifier.taskIdentifier, + errorFingerprint: identifier.errorFingerprint, + status: "UNRESOLVED", + }, + }); + } +} diff --git a/internal-packages/clickhouse/src/errors.ts b/internal-packages/clickhouse/src/errors.ts index c93efbcaf1f..7ae4098c4d9 100644 --- a/internal-packages/clickhouse/src/errors.ts +++ b/internal-packages/clickhouse/src/errors.ts @@ -314,3 +314,148 @@ export function createErrorOccurrencesQueryBuilder( settings ); } + +export const ErrorOccurrencesByVersionQueryResult = z.object({ + error_fingerprint: z.string(), + task_version: z.string(), + bucket_epoch: z.number(), + count: z.number(), +}); + +export type ErrorOccurrencesByVersionQueryResult = z.infer< + typeof ErrorOccurrencesByVersionQueryResult +>; + +/** + * Creates a query builder for bucketed error occurrence counts grouped by task_version. + * Used for stacked-by-version activity charts on the error detail page. + */ +export function createErrorOccurrencesByVersionQueryBuilder( + ch: ClickhouseReader, + intervalExpr: string, + settings?: ClickHouseSettings +): ClickhouseQueryBuilder { + return new ClickhouseQueryBuilder( + "getErrorOccurrencesByVersion", + ` + SELECT + error_fingerprint, + task_version, + toUnixTimestamp(toStartOfInterval(minute, ${intervalExpr})) as bucket_epoch, + sum(count) as count + FROM trigger_dev.error_occurrences_v1 + `, + ch, + ErrorOccurrencesByVersionQueryResult, + settings + ); +} + +// --------------------------------------------------------------------------- +// Alert evaluator – active errors since a timestamp +// --------------------------------------------------------------------------- + +export const ActiveErrorsSinceQueryResult = z.object({ + environment_id: z.string(), + task_identifier: z.string(), + error_fingerprint: z.string(), + error_type: z.string(), + error_message: z.string(), + sample_stack_trace: z.string(), + first_seen: z.string(), + last_seen: z.string(), + occurrence_count: z.number(), +}); + +export type ActiveErrorsSinceQueryResult = z.infer; + +/** + * Query builder for fetching all errors active since a given timestamp. + * Returns errors with last_seen > scheduledAt, grouped by env/task/fingerprint. + * Used by the error alert evaluator to find new issues, regressions, and un-ignored errors. + */ +export function getActiveErrorsSinceQueryBuilder( + ch: ClickhouseReader, + settings?: ClickHouseSettings +) { + return ch.queryBuilder({ + name: "getActiveErrorsSince", + baseQuery: ` + SELECT + environment_id, + task_identifier, + error_fingerprint, + any(error_type) as error_type, + any(error_message) as error_message, + any(sample_stack_trace) as sample_stack_trace, + toString(toUnixTimestamp64Milli(min(first_seen))) as first_seen, + toString(toUnixTimestamp64Milli(max(last_seen))) as last_seen, + toUInt64(sumMerge(occurrence_count)) as occurrence_count + FROM trigger_dev.errors_v1 + `, + schema: ActiveErrorsSinceQueryResult, + settings, + }); +} + +export const OccurrenceCountsSinceQueryResult = z.object({ + environment_id: z.string(), + task_identifier: z.string(), + error_fingerprint: z.string(), + occurrences_since: z.number(), +}); + +export type OccurrenceCountsSinceQueryResult = z.infer; + +/** + * Query builder for occurrence counts since a given timestamp, grouped by error. + * Used by the alert evaluator to check ignore thresholds. + */ +export function getOccurrenceCountsSinceQueryBuilder( + ch: ClickhouseReader, + settings?: ClickHouseSettings +) { + return ch.queryBuilder({ + name: "getOccurrenceCountsSince", + baseQuery: ` + SELECT + environment_id, + task_identifier, + error_fingerprint, + sum(count) as occurrences_since + FROM trigger_dev.error_occurrences_v1 + `, + schema: OccurrenceCountsSinceQueryResult, + settings, + }); +} + +// --------------------------------------------------------------------------- +// Alert evaluator helpers – occurrence rate & count since timestamp +// --------------------------------------------------------------------------- + +export const ErrorOccurrenceTotalCountResult = z.object({ + total_count: z.number(), +}); + +export type ErrorOccurrenceTotalCountResult = z.infer; + +/** + * Query builder for summing occurrences since a given timestamp. + * Used by the alert evaluator to check total-count-based ignore thresholds. + */ +export function getOccurrenceCountSinceQueryBuilder( + ch: ClickhouseReader, + settings?: ClickHouseSettings +) { + return ch.queryBuilder({ + name: "getOccurrenceCountSince", + baseQuery: ` + SELECT + sum(count) as total_count + FROM trigger_dev.error_occurrences_v1 + `, + schema: ErrorOccurrenceTotalCountResult, + settings, + }); +} diff --git a/internal-packages/clickhouse/src/index.ts b/internal-packages/clickhouse/src/index.ts index b6fbd92177b..970a7386665 100644 --- a/internal-packages/clickhouse/src/index.ts +++ b/internal-packages/clickhouse/src/index.ts @@ -34,7 +34,11 @@ import { getErrorHourlyOccurrences, getErrorOccurrencesListQueryBuilder, createErrorOccurrencesQueryBuilder, + createErrorOccurrencesByVersionQueryBuilder, getErrorAffectedVersionsQueryBuilder, + getOccurrenceCountSinceQueryBuilder, + getActiveErrorsSinceQueryBuilder, + getOccurrenceCountsSinceQueryBuilder, } from "./errors.js"; export { msToClickHouseInterval } from "./intervals.js"; import { Logger, type LogLevel } from "@trigger.dev/core/logger"; @@ -251,6 +255,11 @@ export class ClickHouse { occurrencesListQueryBuilder: getErrorOccurrencesListQueryBuilder(this.reader), createOccurrencesQueryBuilder: (intervalExpr: string) => createErrorOccurrencesQueryBuilder(this.reader, intervalExpr), + createOccurrencesByVersionQueryBuilder: (intervalExpr: string) => + createErrorOccurrencesByVersionQueryBuilder(this.reader, intervalExpr), + occurrenceCountSinceQueryBuilder: getOccurrenceCountSinceQueryBuilder(this.reader), + activeErrorsSinceQueryBuilder: getActiveErrorsSinceQueryBuilder(this.reader), + occurrenceCountsSinceQueryBuilder: getOccurrenceCountsSinceQueryBuilder(this.reader), }; } } diff --git a/internal-packages/database/prisma/migrations/20260306102053_error_group_state/migration.sql b/internal-packages/database/prisma/migrations/20260306102053_error_group_state/migration.sql new file mode 100644 index 00000000000..28595d98bf9 --- /dev/null +++ b/internal-packages/database/prisma/migrations/20260306102053_error_group_state/migration.sql @@ -0,0 +1,51 @@ +-- CreateEnum +CREATE TYPE "public"."ErrorGroupStatus" AS ENUM ('UNRESOLVED', 'RESOLVED', 'IGNORED'); + +-- AlterEnum +ALTER TYPE "public"."ProjectAlertType" ADD VALUE 'ERROR_GROUP'; + +-- CreateTable +CREATE TABLE + "public"."ErrorGroupState" ( + "id" TEXT NOT NULL, + "organizationId" TEXT NOT NULL, + "projectId" TEXT NOT NULL, + "environmentId" TEXT, + "taskIdentifier" TEXT NOT NULL, + "errorFingerprint" TEXT NOT NULL, + "status" "public"."ErrorGroupStatus" NOT NULL DEFAULT 'UNRESOLVED', + "ignoredUntil" TIMESTAMP(3), + "ignoredUntilOccurrenceRate" INTEGER, + "ignoredUntilTotalOccurrences" INTEGER, + "ignoredAt" TIMESTAMP(3), + "ignoredReason" TEXT, + "ignoredByUserId" TEXT, + "resolvedAt" TIMESTAMP(3), + "resolvedInVersion" TEXT, + "resolvedBy" TEXT, + "createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, + "updatedAt" TIMESTAMP(3) NOT NULL, + CONSTRAINT "ErrorGroupState_pkey" PRIMARY KEY ("id") + ); + +-- CreateIndex +CREATE INDEX "ErrorGroupState_status_idx" ON "public"."ErrorGroupState" ("status"); + +-- CreateIndex +CREATE INDEX "ErrorGroupState_ignoredUntil_idx" ON "public"."ErrorGroupState" ("ignoredUntil"); + +-- CreateIndex +CREATE UNIQUE INDEX "ErrorGroupState_environmentId_taskIdentifier_errorFingerpri_key" ON "public"."ErrorGroupState" ( + "environmentId", + "taskIdentifier", + "errorFingerprint" +); + +-- AddForeignKey +ALTER TABLE "public"."ErrorGroupState" ADD CONSTRAINT "ErrorGroupState_organizationId_fkey" FOREIGN KEY ("organizationId") REFERENCES "public"."Organization" ("id") ON DELETE CASCADE ON UPDATE CASCADE; + +-- AddForeignKey +ALTER TABLE "public"."ErrorGroupState" ADD CONSTRAINT "ErrorGroupState_projectId_fkey" FOREIGN KEY ("projectId") REFERENCES "public"."Project" ("id") ON DELETE CASCADE ON UPDATE CASCADE; + +-- AddForeignKey +ALTER TABLE "public"."ErrorGroupState" ADD CONSTRAINT "ErrorGroupState_environmentId_fkey" FOREIGN KEY ("environmentId") REFERENCES "public"."RuntimeEnvironment" ("id") ON DELETE CASCADE ON UPDATE CASCADE; \ No newline at end of file diff --git a/internal-packages/database/prisma/migrations/20260308181657_add_error_alert_config_to_project_alert_channel/migration.sql b/internal-packages/database/prisma/migrations/20260308181657_add_error_alert_config_to_project_alert_channel/migration.sql new file mode 100644 index 00000000000..09b0eacdca3 --- /dev/null +++ b/internal-packages/database/prisma/migrations/20260308181657_add_error_alert_config_to_project_alert_channel/migration.sql @@ -0,0 +1,2 @@ +-- AlterTable +ALTER TABLE "public"."ProjectAlertChannel" ADD COLUMN "errorAlertConfig" JSONB; diff --git a/internal-packages/database/prisma/schema.prisma b/internal-packages/database/prisma/schema.prisma index f6986be42c0..f164537099a 100644 --- a/internal-packages/database/prisma/schema.prisma +++ b/internal-packages/database/prisma/schema.prisma @@ -228,6 +228,7 @@ model Organization { githubAppInstallations GithubAppInstallation[] customerQueries CustomerQuery[] metricsDashboards MetricsDashboard[] + errorGroupStates ErrorGroupState[] } model OrgMember { @@ -346,6 +347,7 @@ model RuntimeEnvironment { waitpointTags WaitpointTag[] BulkActionGroup BulkActionGroup[] customerQueries CustomerQuery[] + errorGroupStates ErrorGroupState[] @@unique([projectId, slug, orgMemberId]) @@unique([projectId, shortcode]) @@ -417,6 +419,7 @@ model Project { onboardingData Json? taskScheduleInstances TaskScheduleInstance[] metricsDashboards MetricsDashboard[] + errorGroupStates ErrorGroupState[] } enum ProjectVersion { @@ -782,7 +785,6 @@ model TaskRun { /// Store the stream keys that are being used by the run realtimeStreams String[] @default([]) - @@unique([oneTimeUseToken]) @@unique([runtimeEnvironmentId, taskIdentifier, idempotencyKey]) // Finding child runs @@ -2018,6 +2020,8 @@ model ProjectAlertChannel { alertTypes ProjectAlertType[] environmentTypes RuntimeEnvironmentType[] @default([STAGING, PRODUCTION]) + errorAlertConfig Json? + project Project @relation(fields: [projectId], references: [id], onDelete: Cascade, onUpdate: Cascade) projectId String @@ -2072,6 +2076,7 @@ enum ProjectAlertType { TASK_RUN_ATTEMPT DEPLOYMENT_FAILURE DEPLOYMENT_SUCCESS + ERROR_GROUP } enum ProjectAlertStatus { @@ -2577,3 +2582,80 @@ model MetricsDashboard { /// Fast lookup for the list @@index([projectId, createdAt(sort: Desc)]) } + +enum ErrorGroupStatus { + UNRESOLVED + RESOLVED + IGNORED +} + +/** + * Error group state is used to track when a user has interacted with an error (ignored/resolved) + * The actual error data is in ClickHouse. + */ +model ErrorGroupState { + id String @id @default(cuid()) + + organization Organization @relation(fields: [organizationId], references: [id], onDelete: Cascade, onUpdate: Cascade) + organizationId String + + project Project @relation(fields: [projectId], references: [id], onDelete: Cascade, onUpdate: Cascade) + projectId String + + /** + * You can ignore/resolve an error across all environments, or specific ones + */ + environment RuntimeEnvironment? @relation(fields: [environmentId], references: [id], onDelete: Cascade, onUpdate: Cascade) + environmentId String? + + taskIdentifier String + errorFingerprint String + + status ErrorGroupStatus @default(UNRESOLVED) + + /** + * Error is ignored until this date + */ + ignoredUntil DateTime? + /** + * Error is ignored until this occurrence rate + */ + ignoredUntilOccurrenceRate Int? + /** + * Error is ignored until this total occurrences + */ + ignoredUntilTotalOccurrences Int? + + /** + * Error was ignored at this date + */ + ignoredAt DateTime? + /** + * Reason for ignoring the error + */ + ignoredReason String? + /** + * User who ignored the error + */ + ignoredByUserId String? + + /** + * Error was resolved at this date + */ + resolvedAt DateTime? + /** + * Error was resolved in this version + */ + resolvedInVersion String? + /** + * User who resolved the error + */ + resolvedBy String? + + createdAt DateTime @default(now()) + updatedAt DateTime @updatedAt + + @@unique([environmentId, taskIdentifier, errorFingerprint]) + @@index([status]) + @@index([ignoredUntil]) +} diff --git a/internal-packages/emails/emails/alert-error-group.tsx b/internal-packages/emails/emails/alert-error-group.tsx new file mode 100644 index 00000000000..f584f06edba --- /dev/null +++ b/internal-packages/emails/emails/alert-error-group.tsx @@ -0,0 +1,114 @@ +import { + Body, + CodeBlock, + Container, + Head, + Html, + Link, + Preview, + Text, + dracula, +} from "@react-email/components"; +import { z } from "zod"; +import { Footer } from "./components/Footer"; +import { Image } from "./components/Image"; +import { anchor, container, h1, main, paragraphLight, paragraphTight } from "./components/styles"; +import React from "react"; + +export const AlertErrorGroupEmailSchema = z.object({ + email: z.literal("alert-error-group"), + classification: z.enum(["new_issue", "regression", "unignored"]), + taskIdentifier: z.string(), + environment: z.string(), + error: z.object({ + message: z.string(), + type: z.string().optional(), + stackTrace: z.string().optional(), + }), + occurrenceCount: z.number(), + errorLink: z.string().url(), + organization: z.string(), + project: z.string(), +}); + +type AlertErrorGroupEmailProps = z.infer; + +const classificationLabels: Record = { + new_issue: "New error", + regression: "Regression", + unignored: "Error resurfaced", +}; + +const previewDefaults: AlertErrorGroupEmailProps = { + email: "alert-error-group", + classification: "new_issue", + taskIdentifier: "my-task", + environment: "Production", + error: { + message: "Cannot read property 'foo' of undefined", + type: "TypeError", + stackTrace: "TypeError: Cannot read property 'foo' of undefined\n at Object.", + }, + occurrenceCount: 42, + errorLink: "https://trigger.dev", + organization: "my-organization", + project: "my-project", +}; + +export default function Email(props: AlertErrorGroupEmailProps) { + const { + classification, + taskIdentifier, + environment, + error, + occurrenceCount, + errorLink, + organization, + project, + } = { + ...previewDefaults, + ...props, + }; + + const label = classificationLabels[classification] ?? "Error alert"; + + return ( + + + + {`${organization}: [${label}] ${error.type ?? "Error"} in ${taskIdentifier} (${environment})`} + + + + + {label}: {error.type ?? "Error"} in {taskIdentifier} + + Organization: {organization} + Project: {project} + Task: {taskIdentifier} + Environment: {environment} + Occurrences: {occurrenceCount} + + {error.message} + {error.stackTrace && ( + + )} + + Investigate this error + + + Trigger.dev +