diff --git a/src/Exceptionless.Core/Jobs/CleanupDataJob.cs b/src/Exceptionless.Core/Jobs/CleanupDataJob.cs index 01795f0e2d..a7a94b811a 100644 --- a/src/Exceptionless.Core/Jobs/CleanupDataJob.cs +++ b/src/Exceptionless.Core/Jobs/CleanupDataJob.cs @@ -72,7 +72,7 @@ ILoggerFactory loggerFactory protected override Task GetLockAsync(CancellationToken cancellationToken = default) { - return _lockProvider.TryAcquireAsync(nameof(CleanupDataJob), TimeSpan.FromMinutes(15), cancellationToken); + return _lockProvider.TryAcquireAsync(nameof(CleanupDataJob), TimeSpan.FromHours(2), cancellationToken); } protected override async Task RunInternalAsync(JobContext context) @@ -114,8 +114,13 @@ private async Task CleanupSoftDeletedOrganizationsAsync(JobContext context) while (organizationResults.Documents.Count > 0 && !context.CancellationToken.IsCancellationRequested) { + await RenewLockAsync(context); + foreach (var organization in organizationResults.Documents) { + if (context.CancellationToken.IsCancellationRequested) + break; + using var _ = _logger.BeginScope(new ExceptionlessState().Organization(organization.Id)); try { @@ -142,8 +147,13 @@ private async Task CleanupSoftDeletedProjectsAsync(JobContext context) while (projectResults.Documents.Count > 0 && !context.CancellationToken.IsCancellationRequested) { + await RenewLockAsync(context); + foreach (var project in projectResults.Documents) { + if (context.CancellationToken.IsCancellationRequested) + break; + using var _ = _logger.BeginScope(new ExceptionlessState().Organization(project.OrganizationId).Project(project.Id)); try { @@ -278,8 +288,13 @@ private async Task EnforceRetentionAsync(JobContext context) var results = await _organizationRepository.FindAsync(q => q.Include(o => o.Id, o => o.Name, o => o.RetentionDays), o => o.SearchAfterPaging().PageLimit(100)); while (results.Documents.Count > 0 && !context.CancellationToken.IsCancellationRequested) { + await RenewLockAsync(context); + foreach (var organization in results.Documents) { + if (context.CancellationToken.IsCancellationRequested) + break; + using var _ = _logger.BeginScope(new ExceptionlessState().Organization(organization.Id)); int retentionDays = _billingManager.GetBillingPlanByUpsellingRetentionPeriod(organization.RetentionDays)?.RetentionDays ?? _appOptions.MaximumRetentionDays; @@ -349,6 +364,8 @@ private async Task EnforceEventRetentionDaysAsync(Organization organization, int private Task RenewLockAsync(JobContext context) { + // Called at each page boundary to prevent the distributed lock from expiring + // during long-running bulk cleanup operations that span multiple pages. _lastRun = _timeProvider.GetUtcNow().UtcDateTime; return context.RenewLockAsync(); } diff --git a/src/Exceptionless.Core/Jobs/CleanupOrphanedDataJob.cs b/src/Exceptionless.Core/Jobs/CleanupOrphanedDataJob.cs index 37a67ae24a..875702fb02 100644 --- a/src/Exceptionless.Core/Jobs/CleanupOrphanedDataJob.cs +++ b/src/Exceptionless.Core/Jobs/CleanupOrphanedDataJob.cs @@ -1,51 +1,42 @@ -using Elastic.Clients.Elasticsearch; -using Elastic.Clients.Elasticsearch.Aggregations; -using Elastic.Clients.Elasticsearch.Core.ReindexRethrottle; -using Elastic.Clients.Elasticsearch.QueryDsl; using Exceptionless.Core.Models; using Exceptionless.Core.Repositories; -using Exceptionless.Core.Repositories.Configuration; using Foundatio.Caching; using Foundatio.Jobs; using Foundatio.Lock; using Foundatio.Repositories; -using Foundatio.Repositories.Elasticsearch.Extensions; -using Foundatio.Repositories.Elasticsearch.Utility; -using Foundatio.Repositories.Extensions; using Foundatio.Repositories.Models; using Foundatio.Resilience; using Microsoft.Extensions.Diagnostics.HealthChecks; using Microsoft.Extensions.Logging; -using LogLevel = Microsoft.Extensions.Logging.LogLevel; namespace Exceptionless.Core.Jobs; [Job(Description = "Deletes orphaned data.", IsContinuous = false)] public class CleanupOrphanedDataJob : JobWithLockBase, IHealthCheck { - private readonly ExceptionlessElasticConfiguration _config; - private readonly ElasticsearchClient _elasticClient; - private readonly IStackRepository _stackRepository; - private readonly IProjectRepository _projectRepository; private readonly IOrganizationRepository _organizationRepository; + private readonly IProjectRepository _projectRepository; + private readonly IStackRepository _stackRepository; private readonly IEventRepository _eventRepository; private readonly ICacheClient _cacheClient; private readonly ILockProvider _lockProvider; private DateTime? _lastRun; - public CleanupOrphanedDataJob(ExceptionlessElasticConfiguration config, IStackRepository stackRepository, - IProjectRepository projectRepository, IOrganizationRepository organizationRepository, - IEventRepository eventRepository, ICacheClient cacheClient, ILockProvider lockProvider, + public CleanupOrphanedDataJob( + IOrganizationRepository organizationRepository, + IProjectRepository projectRepository, + IStackRepository stackRepository, + IEventRepository eventRepository, + ICacheClient cacheClient, + ILockProvider lockProvider, TimeProvider timeProvider, IResiliencePolicyProvider resiliencePolicyProvider, ILoggerFactory loggerFactory ) : base(timeProvider, resiliencePolicyProvider, loggerFactory) { - _config = config; - _elasticClient = config.Client; - _stackRepository = stackRepository; - _projectRepository = projectRepository; _organizationRepository = organizationRepository; + _projectRepository = projectRepository; + _stackRepository = stackRepository; _eventRepository = eventRepository; _cacheClient = cacheClient; _lockProvider = lockProvider; @@ -58,6 +49,8 @@ ILoggerFactory loggerFactory protected override async Task RunInternalAsync(JobContext context) { + _lastRun = _timeProvider.GetUtcNow().UtcDateTime; + await DeleteOrphanedEventsByStackAsync(context); await DeleteOrphanedEventsByProjectAsync(context); await DeleteOrphanedEventsByOrganizationAsync(context); @@ -69,200 +62,148 @@ protected override async Task RunInternalAsync(JobContext context) public async Task DeleteOrphanedEventsByStackAsync(JobContext context) { - // get approximate number of unique stack ids - var stackCardinality = await _elasticClient.SearchAsync(s => s - .Indices(GetEventIndexPattern()) - .Size(0) - .AddAggregation("cardinality_stack_id", a => a.Cardinality(c => c.Field(f => f.StackId).PrecisionThreshold(40000)))); - - double? uniqueStackIdCount = stackCardinality.Aggregations?.GetCardinality("cardinality_stack_id")?.Value; - if (!uniqueStackIdCount.HasValue || uniqueStackIdCount.Value <= 0) - return; - - // break into batches of 500 - const int batchSize = 500; - int buckets = (int)uniqueStackIdCount.Value / batchSize; - buckets = Math.Max(1, buckets); - int totalOrphanedEventCount = 0; - int totalStackIds = 0; - - for (int batchNumber = 0; batchNumber < buckets; batchNumber++) + _logger.LogInformation("Starting orphaned events cleanup by stack"); + long totalOrphanedEvents = 0; + long totalStackIds = 0; + var afterKey = new CompositeKeyResult(); + bool hasMore = true; + + while (hasMore && !context.CancellationToken.IsCancellationRequested) { await RenewLockAsync(context); - var stackIdTerms = await _elasticClient.SearchAsync(s => s - .Indices(GetEventIndexPattern()) - .Size(0) - .AddAggregation("terms_stack_id", a => a.Terms(c => c.Field(f => f.StackId).Include(new TermsInclude(batchNumber, buckets)).Size(batchSize * 2)))); - - string[] stackIds = stackIdTerms.Aggregations?.GetStringTerms("terms_stack_id")?.Buckets.Select(b => b.Key.ToString()!).ToArray() ?? []; - if (stackIds.Length == 0) - continue; + var stackIds = await _eventRepository.GetDistinctStackIdsAsync(500, afterKey); + if (stackIds.Count == 0) + break; - totalStackIds += stackIds.Length; + hasMore = stackIds.Count == 500; + totalStackIds += stackIds.Count; - var stacks = await _stackRepository.GetByIdsAsync(stackIds, o => o.ImmediateConsistency()); - var foundStackIds = stacks.Select(stack => stack.Id).ToHashSet(StringComparer.OrdinalIgnoreCase); - string[] missingStackIds = stackIds.Where(stackId => !foundStackIds.Contains(stackId)).ToArray(); + var existingStacks = await _stackRepository.GetByIdsAsync(stackIds.ToArray(), o => o.Include(s => s.Id, s => s.IsDeleted)); + var existingStackIds = existingStacks.Select(s => s.Id).ToHashSet(StringComparer.OrdinalIgnoreCase); + string[] missingStackIds = stackIds.Where(id => !existingStackIds.Contains(id)).ToArray(); if (missingStackIds.Length == 0) - { - _logger.LogInformation("{BatchNumber}/{BatchCount}: Did not find any missing stacks out of {StackIdCount}", batchNumber, buckets, stackIds.Length); continue; - } - totalOrphanedEventCount += missingStackIds.Length; - _logger.LogInformation("{BatchNumber}/{BatchCount}: Found {OrphanedEventCount} orphaned events from missing stacks {MissingStackIds} out of {StackIdCount}", batchNumber, buckets, missingStackIds.Length, missingStackIds, stackIds.Length); - await _elasticClient.DeleteByQueryAsync(r => r - .Indices(GetEventIndexPattern()) - .Query(q => q.Terms(t => t.Field(f => f.StackId).Terms(new TermsQueryField(missingStackIds.Select(FieldValueHelper.ToFieldValue).ToList()))))); + long deletedCount = await _eventRepository.RemoveAllByStackIdsAsync(missingStackIds); + totalOrphanedEvents += deletedCount; + + _logger.LogInformation("Deleted {DeletedCount} orphaned events from {MissingStackCount} missing stacks out of {StackIdCount} checked", deletedCount, missingStackIds.Length, stackIds.Count); } - _logger.LogInformation("Found {OrphanedEventCount} orphaned events from missing stacks out of {StackIdCount}", totalOrphanedEventCount, totalStackIds); + _logger.LogInformation("Completed orphaned events cleanup by stack: deleted {TotalOrphanedEvents} events, checked {TotalStackIds} stacks", totalOrphanedEvents, totalStackIds); } public async Task DeleteOrphanedEventsByProjectAsync(JobContext context) { - // get approximate number of unique project ids - var projectCardinality = await _elasticClient.SearchAsync(s => s - .Indices(GetEventIndexPattern()) - .Size(0) - .AddAggregation("cardinality_project_id", a => a.Cardinality(c => c.Field(f => f.ProjectId).PrecisionThreshold(40000)))); - - double? uniqueProjectIdCount = projectCardinality.Aggregations?.GetCardinality("cardinality_project_id")?.Value; - if (!uniqueProjectIdCount.HasValue || uniqueProjectIdCount.Value <= 0) - return; - - // break into batches of 500 - const int batchSize = 500; - int buckets = (int)uniqueProjectIdCount.Value / batchSize; - buckets = Math.Max(1, buckets); - int totalOrphanedEventCount = 0; - int totalProjectIds = 0; - - for (int batchNumber = 0; batchNumber < buckets; batchNumber++) + _logger.LogInformation("Starting orphaned events cleanup by project"); + long totalOrphanedEvents = 0; + long totalProjectIds = 0; + var afterKey = new CompositeKeyResult(); + bool hasMore = true; + + while (hasMore && !context.CancellationToken.IsCancellationRequested) { await RenewLockAsync(context); - var projectIdTerms = await _elasticClient.SearchAsync(s => s - .Indices(GetEventIndexPattern()) - .Size(0) - .AddAggregation("terms_project_id", a => a.Terms(c => c.Field(f => f.ProjectId).Include(new TermsInclude(batchNumber, buckets)).Size(batchSize * 2)))); + var projectIds = await _eventRepository.GetDistinctProjectIdsAsync(500, afterKey); + if (projectIds.Count == 0) + break; - string[] projectIds = projectIdTerms.Aggregations?.GetStringTerms("terms_project_id")?.Buckets.Select(b => b.Key.ToString()!).ToArray() ?? []; - if (projectIds.Length == 0) - continue; - - totalProjectIds += projectIds.Length; + hasMore = projectIds.Count == 500; + totalProjectIds += projectIds.Count; - var projects = await _projectRepository.GetByIdsAsync(projectIds, o => o.ImmediateConsistency()); - var foundProjectIds = projects.Select(project => project.Id).ToHashSet(StringComparer.OrdinalIgnoreCase); - string[] missingProjectIds = projectIds.Where(projectId => !foundProjectIds.Contains(projectId)).ToArray(); + var existingProjects = await _projectRepository.GetByIdsAsync(projectIds.ToArray(), o => o.Include(p => p.Id, p => p.IsDeleted)); + var existingProjectIds = existingProjects.Select(p => p.Id).ToHashSet(StringComparer.OrdinalIgnoreCase); + string[] missingProjectIds = projectIds.Where(id => !existingProjectIds.Contains(id)).ToArray(); if (missingProjectIds.Length == 0) - { - _logger.LogInformation("{BatchNumber}/{BatchCount}: Did not find any missing projects out of {ProjectIdCount}", batchNumber, buckets, projectIds.Length); continue; - } - totalOrphanedEventCount += missingProjectIds.Length; - _logger.LogInformation("{BatchNumber}/{BatchCount}: Found {OrphanedEventCount} orphaned events from missing projects {MissingProjectIds} out of {ProjectIdCount}", batchNumber, buckets, missingProjectIds.Length, missingProjectIds, projectIds.Length); - await _elasticClient.DeleteByQueryAsync(r => r - .Indices(GetEventIndexPattern()) - .Query(q => q.Terms(t => t.Field(f => f.ProjectId).Terms(new TermsQueryField(missingProjectIds.Select(FieldValueHelper.ToFieldValue).ToList()))))); + long deletedCount = await _eventRepository.RemoveAllByProjectIdsAsync(missingProjectIds); + totalOrphanedEvents += deletedCount; + + _logger.LogInformation("Deleted {DeletedCount} orphaned events from {MissingProjectCount} missing projects out of {ProjectIdCount} checked", deletedCount, missingProjectIds.Length, projectIds.Count); } - _logger.LogInformation("Found {OrphanedEventCount} orphaned events from missing projects out of {ProjectIdCount}", totalOrphanedEventCount, totalProjectIds); + _logger.LogInformation("Completed orphaned events cleanup by project: deleted {TotalOrphanedEvents} events, checked {TotalProjectIds} projects", totalOrphanedEvents, totalProjectIds); } public async Task DeleteOrphanedEventsByOrganizationAsync(JobContext context) { - // get approximate number of unique organization ids - var organizationCardinality = await _elasticClient.SearchAsync(s => s - .Indices(GetEventIndexPattern()) - .Size(0) - .AddAggregation("cardinality_organization_id", a => a.Cardinality(c => c.Field(f => f.OrganizationId).PrecisionThreshold(40000)))); - - double? uniqueOrganizationIdCount = organizationCardinality.Aggregations?.GetCardinality("cardinality_organization_id")?.Value; - if (!uniqueOrganizationIdCount.HasValue || uniqueOrganizationIdCount.Value <= 0) - return; - - // break into batches of 500 - const int batchSize = 500; - int buckets = (int)uniqueOrganizationIdCount.Value / batchSize; - buckets = Math.Max(1, buckets); - int totalOrphanedEventCount = 0; - int totalOrganizationIds = 0; - - for (int batchNumber = 0; batchNumber < buckets; batchNumber++) + _logger.LogInformation("Starting orphaned events cleanup by organization"); + long totalOrphanedEvents = 0; + long totalOrganizationIds = 0; + var afterKey = new CompositeKeyResult(); + bool hasMore = true; + + while (hasMore && !context.CancellationToken.IsCancellationRequested) { await RenewLockAsync(context); - var organizationIdTerms = await _elasticClient.SearchAsync(s => s - .Indices(GetEventIndexPattern()) - .Size(0) - .AddAggregation("terms_organization_id", a => a.Terms(c => c.Field(f => f.OrganizationId).Include(new TermsInclude(batchNumber, buckets)).Size(batchSize * 2)))); + var organizationIds = await _eventRepository.GetDistinctOrganizationIdsAsync(500, afterKey); + if (organizationIds.Count == 0) + break; - string[] organizationIds = organizationIdTerms.Aggregations?.GetStringTerms("terms_organization_id")?.Buckets.Select(b => b.Key.ToString()!).ToArray() ?? []; - if (organizationIds.Length == 0) - continue; - - totalOrganizationIds += organizationIds.Length; + hasMore = organizationIds.Count == 500; + totalOrganizationIds += organizationIds.Count; - var organizations = await _organizationRepository.GetByIdsAsync(organizationIds, o => o.ImmediateConsistency()); - var foundOrganizationIds = organizations.Select(organization => organization.Id).ToHashSet(StringComparer.OrdinalIgnoreCase); - string[] missingOrganizationIds = organizationIds.Where(organizationId => !foundOrganizationIds.Contains(organizationId)).ToArray(); + var existingOrganizations = await _organizationRepository.GetByIdsAsync(organizationIds.ToArray(), o => o.Include(organization => organization.Id, organization => organization.IsDeleted)); + var existingOrganizationIds = existingOrganizations.Select(organization => organization.Id).ToHashSet(StringComparer.OrdinalIgnoreCase); + string[] missingOrganizationIds = organizationIds.Where(id => !existingOrganizationIds.Contains(id)).ToArray(); if (missingOrganizationIds.Length == 0) - { - _logger.LogInformation("{BatchNumber}/{BatchCount}: Did not find any missing organizations out of {OrganizationIdCount}", batchNumber, buckets, organizationIds.Length); continue; - } - totalOrphanedEventCount += missingOrganizationIds.Length; - _logger.LogInformation("{BatchNumber}/{BatchCount}: Found {OrphanedEventCount} orphaned events from missing organizations {MissingOrganizationIds} out of {OrganizationIdCount}", batchNumber, buckets, missingOrganizationIds.Length, missingOrganizationIds, organizationIds.Length); - await _elasticClient.DeleteByQueryAsync(r => r - .Indices(GetEventIndexPattern()) - .Query(q => q.Terms(t => t.Field(f => f.OrganizationId).Terms(new TermsQueryField(missingOrganizationIds.Select(FieldValueHelper.ToFieldValue).ToList()))))); + long deletedCount = await _eventRepository.RemoveAllByOrganizationIdsAsync(missingOrganizationIds); + totalOrphanedEvents += deletedCount; + + _logger.LogInformation("Deleted {DeletedCount} orphaned events from {MissingOrganizationCount} missing organizations out of {OrganizationIdCount} checked", deletedCount, missingOrganizationIds.Length, organizationIds.Count); } - _logger.LogInformation("Found {OrphanedEventCount} orphaned events from missing organizations out of {OrganizationIdCount}", totalOrphanedEventCount, totalOrganizationIds); + _logger.LogInformation("Completed orphaned events cleanup by organization: deleted {TotalOrphanedEvents} events, checked {TotalOrganizationIds} organizations", totalOrphanedEvents, totalOrganizationIds); } public async Task FixDuplicateStacks(JobContext context) { _logger.LogInformation("Getting duplicate stacks"); - var duplicateStackAgg = await _elasticClient.SearchAsync(q => q - .Indices(_config.Stacks.VersionedName) - .Query(q => q.QueryString(qs => qs.Query("is_deleted:false"))) - .Size(0) - .AddAggregation("stacks", a => a.Terms(t => t.Field(f => f.DuplicateSignature).MinDocCount(2).Size(10000)))); - _logger.LogRequest(duplicateStackAgg, LogLevel.Trace); - - var buckets = duplicateStackAgg.Aggregations?.GetStringTerms("stacks")?.Buckets.ToList() ?? []; - int total = buckets.Count; + int total = 0; int processed = 0; int error = 0; long totalUpdatedEventCount = 0; var lastStatus = _timeProvider.GetUtcNow().UtcDateTime; - int batch = 1; + int batch = 0; - while (buckets.Count > 0) + // Loop until no more duplicate signatures exist. Each iteration forces an index refresh + // (via ImmediateConsistency on GetDuplicateSignaturesAsync) so soft-deleted stacks are + // excluded from subsequent aggregation calls, preventing re-processing. + while (!context.CancellationToken.IsCancellationRequested) { - _logger.LogInformation($"Found {buckets.Count} duplicate stacks in batch #{batch}."); + var duplicateSignatures = await _stackRepository.GetDuplicateSignaturesAsync(); + if (duplicateSignatures.Count == 0) + break; + + batch++; + total += duplicateSignatures.Count; + _logger.LogInformation("Found {Total} duplicate stacks in batch #{Batch}", duplicateSignatures.Count, batch); await RenewLockAsync(context); - foreach (var duplicateSignature in buckets) + int batchProcessed = 0; + foreach (var duplicateSignature in duplicateSignatures) { + if (context.CancellationToken.IsCancellationRequested) + break; + string? projectId = null; string? signature = null; try { - string[] parts = duplicateSignature.Key.ToString().Split(':'); + string[] parts = duplicateSignature.Split(':'); if (parts.Length != 2) { - _logger.LogError("Error parsing duplicate signature {DuplicateSignature}", duplicateSignature.Key.ToString()); + _logger.LogError("Error parsing duplicate signature {DuplicateSignature}", duplicateSignature); continue; } projectId = parts[0]; @@ -276,16 +217,16 @@ public async Task FixDuplicateStacks(JobContext context) } var eventCounts = await _eventRepository.CountAsync(q => q.Stack(stacks.Documents.Select(s => s.Id)).AggregationsExpression("terms:stack_id")); - var eventCountBuckets = eventCounts.Aggregations.Terms("terms_stack_id")?.Buckets ?? new List>(); + var eventCountBuckets = eventCounts.Aggregations.Terms("terms_stack_id")?.Buckets ?? new List>(); - // we only need to update events if more than one stack has events associated to it + // We only need to update events if more than one stack has events associated to it. bool shouldUpdateEvents = eventCountBuckets.Count > 1; - // default to using the oldest stack + // Default to using the oldest stack. var targetStack = stacks.Documents.OrderBy(s => s.CreatedUtc).First(); var duplicateStacks = stacks.Documents.OrderBy(s => s.CreatedUtc).Skip(1).ToList(); - // use the stack that has the most events on it so we can reduce the number of updates + // Use the stack that has the most events on it so we can reduce the number of updates. if (eventCountBuckets.Count > 0) { string targetStackId = eventCountBuckets.OrderByDescending(b => b.Total).First().Key; @@ -297,102 +238,65 @@ public async Task FixDuplicateStacks(JobContext context) targetStack.Status = stacks.Documents.FirstOrDefault(d => d.Status != StackStatus.Open)?.Status ?? StackStatus.Open; targetStack.LastOccurrence = stacks.Documents.Max(d => d.LastOccurrence); targetStack.SnoozeUntilUtc = stacks.Documents.Max(d => d.SnoozeUntilUtc); - targetStack.DateFixed = stacks.Documents.Max(d => d.DateFixed); ; + targetStack.DateFixed = stacks.Documents.Max(d => d.DateFixed); targetStack.TotalOccurrences += duplicateStacks.Sum(d => d.TotalOccurrences); - targetStack.Tags.AddRange(duplicateStacks.SelectMany(d => d.Tags)); + targetStack.Tags.UnionWith(duplicateStacks.SelectMany(d => d.Tags)); targetStack.References = stacks.Documents.SelectMany(d => d.References).Distinct().ToList(); targetStack.OccurrencesAreCritical = stacks.Documents.Any(d => d.OccurrencesAreCritical); duplicateStacks.ForEach(s => s.IsDeleted = true); - await _stackRepository.SaveAsync(duplicateStacks); - await _stackRepository.SaveAsync(targetStack); - processed++; - - long eventsToMove = eventCountBuckets.Where(b => b.Key != targetStack.Id).Sum(b => b.Total) ?? 0; - _logger.LogInformation("De-duped stack: Target={TargetId} Events={EventCount} Dupes={DuplicateIds} HasEvents={HasEvents}", targetStack.Id, eventsToMove, duplicateStacks.Select(s => s.Id), shouldUpdateEvents); if (shouldUpdateEvents) { - var response = await _elasticClient.UpdateByQueryAsync(u => u - .Indices(GetEventIndexPattern()) - .Query(q => q.Bool(b => b.Must(m => m - .Terms(t => t.Field(f => f.StackId).Terms(new TermsQueryField(duplicateStacks.Select(s => FieldValueHelper.ToFieldValue(s.Id)).ToList()))) - ))) - .Script(s => s.Source($"ctx._source.stack_id = '{targetStack.Id}'").Lang(ScriptLanguage.Painless)) - .Conflicts(Conflicts.Proceed) - .WaitForCompletion(false)); - _logger.LogRequest(response, LogLevel.Trace); - - var taskStartedTime = _timeProvider.GetUtcNow().UtcDateTime; - var taskId = response.Task; - int attempts = 0; - long affectedRecords = 0; - do - { - attempts++; - var taskStatus = await _elasticClient.Tasks.GetAsync(taskId!.FullyQualifiedId); - var status = taskStatus.Task.Status as ReindexStatus; - if (taskStatus.Completed) - { - // TODO: need to check to see if the task failed or completed successfully. Throw if it failed. - if (_timeProvider.GetUtcNow().UtcDateTime.Subtract(taskStartedTime) > TimeSpan.FromSeconds(30)) - _logger.LogInformation("Script operation task ({TaskId}) completed: Created: {Created} Updated: {Updated} Deleted: {Deleted} Conflicts: {Conflicts} Total: {Total}", taskId, status?.Created, status?.Updated, status?.Deleted, status?.VersionConflicts, status?.Total); - - affectedRecords += (status?.Created ?? 0) + (status?.Updated ?? 0) + (status?.Deleted ?? 0); - break; - } - - if (_timeProvider.GetUtcNow().UtcDateTime.Subtract(taskStartedTime) > TimeSpan.FromSeconds(30)) - { - await RenewLockAsync(context); - _logger.LogInformation("Checking script operation task ({TaskId}) status: Created: {Created} Updated: {Updated} Deleted: {Deleted} Conflicts: {Conflicts} Total: {Total}", taskId, status?.Created, status?.Updated, status?.Deleted, status?.VersionConflicts, status?.Total); - } - - var delay = TimeSpan.FromMilliseconds(50); - if (attempts > 20) - delay = TimeSpan.FromSeconds(5); - else if (attempts > 10) - delay = TimeSpan.FromSeconds(1); - else if (attempts > 5) - delay = TimeSpan.FromMilliseconds(250); - - await Task.Delay(delay, _timeProvider, context.CancellationToken); - } while (true); + // Reassign events before soft-deleting duplicates: if event reassignment + // fails, the duplicate stacks remain visible and no data is lost. + long affectedRecords = await _eventRepository.ReassignStackAsync( + duplicateStacks.Select(s => s.Id), targetStack.Id); _logger.LogInformation("Migrated stack events: Target={TargetId} Events={UpdatedEvents} Dupes={DuplicateIds}", targetStack.Id, affectedRecords, duplicateStacks.Select(s => s.Id)); - totalUpdatedEventCount += affectedRecords; } + // Soft-delete duplicates and save after events are safely migrated. + // No per-item ImmediateConsistency needed: GetDuplicateSignaturesAsync + // forces a refresh before each batch aggregation call. + await _stackRepository.SaveAsync([..duplicateStacks, targetStack]); + processed++; + batchProcessed++; + + long eventsToMove = eventCountBuckets.Where(b => b.Key != targetStack.Id).Sum(b => b.Total) ?? 0; + _logger.LogInformation("De-duped stack: Target={TargetId} Events={EventCount} Dupes={DuplicateIds} HasEvents={HasEvents}", targetStack.Id, eventsToMove, duplicateStacks.Select(s => s.Id), shouldUpdateEvents); + if (_timeProvider.GetUtcNow().UtcDateTime.Subtract(lastStatus) > TimeSpan.FromSeconds(5)) { lastStatus = _timeProvider.GetUtcNow().UtcDateTime; + await RenewLockAsync(context); _logger.LogInformation("Total={Processed}/{Total} Errors={ErrorCount}", processed, total, error); - await _cacheClient.RemoveByPrefixAsync(nameof(Stack)); } } + catch (OperationCanceledException) when (context.CancellationToken.IsCancellationRequested) + { + throw; + } catch (Exception ex) { + // Intentionally broad: log and continue processing other groups rather than + // aborting the entire job for a single corrupt or transiently failing signature. error++; - _logger.LogError(ex, "Error fixing duplicate stack {ProjectId} {SignatureHash}", projectId, signature); + _logger.LogError(ex, "Error fixing duplicate stack {ProjectId} {SignatureHash}: {Message}", projectId, signature, ex.Message); } } - await _elasticClient.Indices.RefreshAsync(_config.Stacks.VersionedName); - duplicateStackAgg = await _elasticClient.SearchAsync(q => q - .Indices(_config.Stacks.VersionedName) - .Query(q => q.QueryString(qs => qs.Query("is_deleted:false"))) - .Size(0) - .AddAggregation("stacks", a => a.Terms(t => t.Field(f => f.DuplicateSignature).MinDocCount(2).Size(10000)))); - _logger.LogRequest(duplicateStackAgg, LogLevel.Trace); - - buckets = duplicateStackAgg.Aggregations?.GetStringTerms("stacks")?.Buckets.ToList() ?? []; - total += buckets.Count; - batch++; - - _logger.LogInformation("Done de-duping stacks: Total={Processed}/{Total} Errors={ErrorCount}", processed, total, error); + _logger.LogInformation("Batch #{Batch} complete: Processed={BatchProcessed} Total={Processed}/{Total} Errors={ErrorCount} UpdatedEvents={UpdatedEventCount}", batch, batchProcessed, processed, total, error, totalUpdatedEventCount); await _cacheClient.RemoveByPrefixAsync(nameof(Stack)); + + // If nothing was processed this batch (all errors), stop to avoid an infinite loop + // where the same failing signatures are retried indefinitely. + if (batchProcessed == 0) + break; } + + _logger.LogInformation("Done de-duping stacks: Total={Processed}/{Total} Errors={ErrorCount} UpdatedEvents={UpdatedEventCount}", processed, total, error, totalUpdatedEventCount); } private Task RenewLockAsync(JobContext context) @@ -401,11 +305,6 @@ private Task RenewLockAsync(JobContext context) return context.RenewLockAsync(); } - private string GetEventIndexPattern() - { - return $"{_config.Events.VersionedName}-*"; - } - public Task CheckHealthAsync(HealthCheckContext context, CancellationToken cancellationToken = default) { if (!_lastRun.HasValue) diff --git a/src/Exceptionless.Core/Repositories/EventRepository.cs b/src/Exceptionless.Core/Repositories/EventRepository.cs index 3fba37a69d..659268aa9f 100644 --- a/src/Exceptionless.Core/Repositories/EventRepository.cs +++ b/src/Exceptionless.Core/Repositories/EventRepository.cs @@ -1,21 +1,30 @@ -using Elastic.Clients.Elasticsearch.QueryDsl; +using System.Linq.Expressions; +using Elastic.Clients.Elasticsearch; +using Elastic.Clients.Elasticsearch.Aggregations; +using Elastic.Clients.Elasticsearch.QueryDsl; +using Elastic.Transport; +using Elastic.Transport.Products.Elasticsearch; using Exceptionless.Core.Models; using Exceptionless.Core.Repositories.Configuration; using Exceptionless.Core.Repositories.Queries; using Exceptionless.Core.Validation; using Exceptionless.DateTimeExtensions; using Foundatio.Repositories; +using Foundatio.Repositories.Elasticsearch.Extensions; +using Foundatio.Repositories.Exceptions; using Foundatio.Repositories.Models; namespace Exceptionless.Core.Repositories; public class EventRepository : RepositoryOwnedByOrganizationAndProject, IEventRepository { + private readonly ExceptionlessElasticConfiguration _configuration; private readonly TimeProvider _timeProvider; public EventRepository(ExceptionlessElasticConfiguration configuration, AppOptions options, MiniValidationValidator validator) : base(configuration.Events, validator, options) { + _configuration = configuration; _timeProvider = configuration.TimeProvider; DisableCache(); // NOTE: If cache is ever enabled, then fast paths for patching/deleting with scripts will be super slow! @@ -74,7 +83,7 @@ public Task RemoveAllAsync(string organizationId, string? clientIpAddress, if (!String.IsNullOrEmpty(clientIpAddress)) query = query.FieldEquals(EventIndex.Alias.IpAddress, clientIpAddress); - return RemoveAllAsync(q => query, options); + return RemoveAllIgnoringMissingEventIndexesAsync(q => query, options); } public Task> GetByReferenceIdAsync(string projectId, string referenceId) @@ -188,12 +197,180 @@ public override Task> GetByProjectIdAsync(string pr return FindAsync(q => q.Project(projectId).SortDescending(e => e.Date).SortDescending(e => e.Id), options); } + public override Task RemoveAllByOrganizationIdAsync(string organizationId) + { + ArgumentException.ThrowIfNullOrEmpty(organizationId); + + return RemoveAllIgnoringMissingEventIndexesAsync(q => q.Organization(organizationId)); + } + + public override Task RemoveAllByProjectIdAsync(string organizationId, string projectId) + { + ArgumentException.ThrowIfNullOrEmpty(organizationId); + ArgumentException.ThrowIfNullOrEmpty(projectId); + + return RemoveAllIgnoringMissingEventIndexesAsync(q => q.Organization(organizationId).Project(projectId)); + } + public Task RemoveAllByStackIdsAsync(string[] stackIds) { ArgumentNullException.ThrowIfNull(stackIds); - if (stackIds.Length == 0) + if (stackIds is []) throw new ArgumentOutOfRangeException(nameof(stackIds)); - return RemoveAllAsync(q => q.Stack(stackIds)); + return RemoveAllIgnoringMissingEventIndexesAsync(q => q.Stack(stackIds)); + } + + public Task RemoveAllByProjectIdsAsync(string[] projectIds) + { + ArgumentNullException.ThrowIfNull(projectIds); + if (projectIds is []) + throw new ArgumentOutOfRangeException(nameof(projectIds)); + + return RemoveAllIgnoringMissingEventIndexesAsync(q => q.Project(projectIds)); + } + + public Task RemoveAllByOrganizationIdsAsync(string[] organizationIds) + { + ArgumentNullException.ThrowIfNull(organizationIds); + if (organizationIds is []) + throw new ArgumentOutOfRangeException(nameof(organizationIds)); + + return RemoveAllIgnoringMissingEventIndexesAsync(q => q.Organization(organizationIds)); + } + + private async Task RemoveAllIgnoringMissingEventIndexesAsync( + RepositoryQueryDescriptor query, CommandOptionsDescriptor? options = null) + { + try + { + return await RemoveAllAsync(query, options); + } + catch (RepositoryException ex) when (IsIndexNotFound(ex.InnerException as TransportException)) + { + return 0; + } + catch (TransportException ex) when (IsIndexNotFound(ex)) + { + return 0; + } + } + + private static bool IsIndexNotFound(TransportException? ex) + { + if (ex?.ApiCallDetails?.HttpStatusCode != 404) + return false; + + return ex.ApiCallDetails.ProductError is ElasticsearchServerError serverError + ? IsIndexNotFound(serverError) + : ex.DebugInformation.Contains("index_not_found_exception", StringComparison.Ordinal); + } + + private static bool IsIndexNotFound(ElasticsearchServerError serverError) + { + if (serverError.Status != 404 || serverError.Error is null) + return false; + + return String.Equals(serverError.Error.Type, "index_not_found_exception", StringComparison.Ordinal) + || serverError.Error.RootCause?.Any(IsIndexNotFound) == true; + } + + private static bool IsIndexNotFound(Elastic.Transport.Products.Elasticsearch.ErrorCause? cause) + { + return cause is not null + && (String.Equals(cause.Type, "index_not_found_exception", StringComparison.Ordinal) + || cause.CausedBy is not null && IsIndexNotFound(cause.CausedBy)); + } + + /// + /// Reassigns all events from the source stacks to the target stack using a parameterized + /// Painless script (no string interpolation) to prevent script injection. + /// + public Task ReassignStackAsync(IEnumerable sourceStackIds, string targetStackId) + { + ArgumentNullException.ThrowIfNull(sourceStackIds); + ArgumentException.ThrowIfNullOrEmpty(targetStackId); + + // Materialize to avoid multiple enumeration and guard against empty; an empty + // .Stack() filter would match ALL events and reassign them to the target stack. + var sourceIds = sourceStackIds.ToList(); + if (sourceIds.Count == 0) + return Task.FromResult(0L); + + return PatchAllAsync( + q => q.Stack(sourceIds), + new ScriptPatch("ctx._source.stack_id = params.targetStackId") + { + Params = new Dictionary { ["targetStackId"] = targetStackId } + }); + } + + public Task> GetDistinctStackIdsAsync(int batchSize, CompositeKeyResult? afterKey = null) + { + return GetDistinctFieldValuesAsync("stack_id", e => e.StackId, batchSize, afterKey); + } + + public Task> GetDistinctProjectIdsAsync(int batchSize, CompositeKeyResult? afterKey = null) + { + return GetDistinctFieldValuesAsync("project_id", e => e.ProjectId, batchSize, afterKey); + } + + public Task> GetDistinctOrganizationIdsAsync(int batchSize, CompositeKeyResult? afterKey = null) + { + return GetDistinctFieldValuesAsync("organization_id", e => e.OrganizationId, batchSize, afterKey); + } + + /// + /// Uses a composite aggregation to paginate through all distinct values of a field. + /// Composite aggregations are preferred over terms aggregations for high-cardinality fields + /// because terms aggregations can silently miss values when the unique count exceeds the + /// configured size parameter. Composite aggregations guarantee correct iteration via an + /// after_key cursor, at the cost of requiring sequential page fetches. + /// + private async Task> GetDistinctFieldValuesAsync( + string fieldName, Expression> fieldExpression, int batchSize, CompositeKeyResult? afterKey) + { + var afterKeyValues = afterKey?.AfterKey; + string aggregationName = $"composite_{fieldName}"; + var sources = new List> + { + new(fieldName, new CompositeAggregationSource + { + Terms = new CompositeTermsAggregation { Field = fieldExpression } + }) + }; + + var search = await _configuration.Client.SearchAsync(s => + { + s.Size(0) + .AddAggregation(aggregationName, a => a.Composite(c => + { + c.Size(batchSize) + .Sources(sources); + + if (afterKeyValues is { Count: > 0 }) + c.After(afterKeyValues); + })); + }); + + var composite = search.Aggregations?.GetComposite(aggregationName); + + // Always clear the cursor first; repopulate only when a next page exists. + // This ensures callers that check afterKey.AfterKey.Count > 0 correctly + // detect end-of-pagination without requiring a final empty-result fetch. + if (afterKey is not null) + { + afterKey.AfterKey.Clear(); + if (composite?.AfterKey is not null) + { + foreach (var kvp in composite.AfterKey) + afterKey.AfterKey[kvp.Key] = kvp.Value; + } + } + + if (composite?.Buckets is not { Count: > 0 }) + return []; + + return composite.Buckets.Select(b => b.Key[fieldName].ToString()!).ToArray(); } } diff --git a/src/Exceptionless.Core/Repositories/Interfaces/IEventRepository.cs b/src/Exceptionless.Core/Repositories/Interfaces/IEventRepository.cs index c7c2272cbc..e68c19153d 100644 --- a/src/Exceptionless.Core/Repositories/Interfaces/IEventRepository.cs +++ b/src/Exceptionless.Core/Repositories/Interfaces/IEventRepository.cs @@ -1,4 +1,5 @@ -using Exceptionless.Core.Models; +using Elastic.Clients.Elasticsearch; +using Exceptionless.Core.Models; using Exceptionless.Core.Repositories.Queries; using Foundatio.Repositories; using Foundatio.Repositories.Models; @@ -13,6 +14,17 @@ public interface IEventRepository : IRepositoryOwnedByOrganizationAndProject UpdateSessionStartLastActivityAsync(string id, DateTime lastActivityUtc, bool isSessionEnd = false, bool hasError = false, bool sendNotifications = true); Task RemoveAllAsync(string organizationId, string? clientIpAddress, DateTime? utcStart, DateTime? utcEnd, CommandOptionsDescriptor? options = null); Task RemoveAllByStackIdsAsync(string[] stackIds); + Task RemoveAllByProjectIdsAsync(string[] projectIds); + Task RemoveAllByOrganizationIdsAsync(string[] organizationIds); + Task ReassignStackAsync(IEnumerable sourceStackIds, string targetStackId); + Task> GetDistinctStackIdsAsync(int batchSize, CompositeKeyResult? afterKey = null); + Task> GetDistinctProjectIdsAsync(int batchSize, CompositeKeyResult? afterKey = null); + Task> GetDistinctOrganizationIdsAsync(int batchSize, CompositeKeyResult? afterKey = null); +} + +public record CompositeKeyResult +{ + public Dictionary AfterKey { get; init; } = []; } public static class EventRepositoryExtensions diff --git a/src/Exceptionless.Core/Repositories/Interfaces/IStackRepository.cs b/src/Exceptionless.Core/Repositories/Interfaces/IStackRepository.cs index 13199d28c6..f2ec72337a 100644 --- a/src/Exceptionless.Core/Repositories/Interfaces/IStackRepository.cs +++ b/src/Exceptionless.Core/Repositories/Interfaces/IStackRepository.cs @@ -15,4 +15,5 @@ public interface IStackRepository : IRepositoryOwnedByOrganizationAndProject> GetStacksForCleanupAsync(string organizationId, DateTime cutoff); Task> GetSoftDeleted(); Task SoftDeleteByProjectIdAsync(string organizationId, string projectId); + Task> GetDuplicateSignaturesAsync(int maxResults = 10000); } diff --git a/src/Exceptionless.Core/Repositories/StackRepository.cs b/src/Exceptionless.Core/Repositories/StackRepository.cs index 29b0c3407e..adc0524d72 100644 --- a/src/Exceptionless.Core/Repositories/StackRepository.cs +++ b/src/Exceptionless.Core/Repositories/StackRepository.cs @@ -187,6 +187,22 @@ public Task SoftDeleteByProjectIdAsync(string organizationId, string proje ); } + public async Task> GetDuplicateSignaturesAsync(int maxResults = 10000) + { + // ImmediateConsistency forces a segment refresh before the aggregation so that + // any stacks soft-deleted in a previous batch are excluded here. Cost: one refresh + // per batch (not per item), equivalent to the original explicit index refresh. + var result = await CountAsync( + q => q.AggregationsExpression($"terms:(duplicate_signature~{maxResults} @min:2)"), + o => o.ImmediateConsistency()); + + var buckets = result.Aggregations.Terms("terms_duplicate_signature")?.Buckets; + if (buckets is not { Count: > 0 }) + return []; + + return buckets.Select(b => b.Key).ToArray(); + } + protected override async Task AddDocumentsToCacheAsync(ICollection> findHits, ICommandOptions options, bool isDirtyRead) { await base.AddDocumentsToCacheAsync(findHits, options, isDirtyRead); diff --git a/tests/Exceptionless.Tests/Jobs/CleanupDataJobTests.cs b/tests/Exceptionless.Tests/Jobs/CleanupDataJobTests.cs index fbf525beed..1de3042e77 100644 --- a/tests/Exceptionless.Tests/Jobs/CleanupDataJobTests.cs +++ b/tests/Exceptionless.Tests/Jobs/CleanupDataJobTests.cs @@ -52,7 +52,7 @@ public CleanupDataJobTests(ITestOutputHelper output, AppWebHostFactory factory) } [Fact] - public async Task CanCleanupSuspendedTokens() + public async Task RunAsync_SuspendedOrganization_SuspendsRelatedTokens() { var organization = _organizationData.GenerateSampleOrganization(_billingManager, _plans); organization.IsSuspended = true; @@ -74,7 +74,7 @@ public async Task CanCleanupSuspendedTokens() } [Fact] - public async Task CanCleanupSoftDeletedOrganization() + public async Task RunAsync_SoftDeletedOrganization_RemovesAllRelatedData() { var organization = _organizationData.GenerateSampleOrganization(_billingManager, _plans); organization.IsDeleted = true; @@ -97,7 +97,7 @@ public async Task CanCleanupSoftDeletedOrganization() } [Fact] - public async Task CanCleanupSoftDeletedProject() + public async Task RunAsync_SoftDeletedProject_RemovesProjectAndEvents() { var organization = await _organizationRepository.AddAsync(_organizationData.GenerateSampleOrganization(_billingManager, _plans), o => o.ImmediateConsistency()); @@ -117,7 +117,7 @@ public async Task CanCleanupSoftDeletedProject() } [Fact] - public async Task CanCleanupSoftDeletedStack() + public async Task RunAsync_SoftDeletedStack_RemovesStackAndEvents() { var organization = await _organizationRepository.AddAsync(_organizationData.GenerateSampleOrganization(_billingManager, _plans), o => o.ImmediateConsistency()); var project = await _projectRepository.AddAsync(_projectData.GenerateSampleProject(), o => o.ImmediateConsistency()); @@ -137,7 +137,7 @@ public async Task CanCleanupSoftDeletedStack() } [Fact] - public async Task CanCleanupEventsOutsideOfRetentionPeriod() + public async Task RunAsync_EventsOutsideRetentionPeriod_RemovesExpiredEvents() { var organization = _organizationData.GenerateSampleOrganization(_billingManager, _plans); _billingManager.ApplyBillingPlan(organization, _plans.FreePlan); @@ -159,7 +159,7 @@ public async Task CanCleanupEventsOutsideOfRetentionPeriod() } [Fact] - public async Task CanDeleteOrphanedEventsByStack() + public async Task DeleteOrphanedEventsByStack_WithLargeDataset_DeletesAllOrphanedEvents() { var organization = _organizationData.GenerateSampleOrganization(_billingManager, _plans); await _organizationRepository.AddAsync(organization, o => o.ImmediateConsistency()); @@ -725,4 +725,152 @@ public async Task EnforceRetentionAsync_ExpiredEvents_DoesNotIncrementDeletedUsa Assert.Equal(0, usageResponse.CurrentUsage.Deleted); Assert.Equal(0, usageResponse.CurrentHourUsage.Deleted); } + + [Fact] + public async Task CleanupSoftDeletedOrganizations_WithMultiplePages_RemovesAllData() + { + // Create more than the page size (5) of soft-deleted organizations to test pagination + var orgs = new List(); + for (int i = 0; i < 12; i++) + { + var org = _organizationData.GenerateSampleOrganization(_billingManager, _plans); + org.Id = ObjectId.GenerateNewId().ToString(); + org.IsDeleted = true; + orgs.Add(org); + } + await _organizationRepository.AddAsync(orgs, o => o.ImmediateConsistency()); + + // Create associated projects and events for a subset + var project = _projectData.GenerateSampleProject(); + project.OrganizationId = orgs[0].Id; + await _projectRepository.AddAsync(project, o => o.ImmediateConsistency()); + + var stack = _stackData.GenerateStack(generateId: true, organizationId: orgs[0].Id, projectId: project.Id); + await _stackRepository.AddAsync(stack, o => o.ImmediateConsistency()); + + await _eventRepository.AddAsync(_eventData.GenerateEvents(10, orgs[0].Id, project.Id, stack.Id), o => o.ImmediateConsistency()); + + await _job.RunAsync(TestCancellationToken); + + // All soft-deleted orgs should be removed + foreach (var org in orgs) + Assert.Null(await _organizationRepository.GetByIdAsync(org.Id, o => o.IncludeSoftDeletes())); + + // Associated data should be gone too + Assert.Null(await _projectRepository.GetByIdAsync(project.Id, o => o.IncludeSoftDeletes())); + Assert.Null(await _stackRepository.GetByIdAsync(stack.Id, o => o.IncludeSoftDeletes())); + var eventCount = await _eventRepository.CountAsync(o => o.IncludeSoftDeletes()); + Assert.Equal(0, eventCount); + } + + [Fact] + public async Task EnforceRetention_WithMultipleOrganizations_RespectsPerOrgRetention() + { + // Retention enforcement uses GetBillingPlanByUpsellingRetentionPeriod which returns the next + // plan with retention > org's retention. FreePlan (3d) → effective 30d, SmallPlan (30d) → effective 90d. + var org1 = _organizationData.GenerateSampleOrganization(_billingManager, _plans); + org1.Id = ObjectId.GenerateNewId().ToString(); + _billingManager.ApplyBillingPlan(org1, _plans.SmallPlan); // effective retention: 90 days (next plan up is Large) + org1.StripeCustomerId = "cust_test1"; + org1.CardLast4 = "4242"; + org1.SubscribeDate = DateTime.UtcNow; + org1.BillingChangedByUserId = TestConstants.UserId; + + var org2 = _organizationData.GenerateSampleOrganization(_billingManager, _plans); + org2.Id = ObjectId.GenerateNewId().ToString(); + _billingManager.ApplyBillingPlan(org2, _plans.FreePlan); // effective retention: 30 days (next plan up is Small) + await _organizationRepository.AddAsync(new[] { org1, org2 }, o => o.ImmediateConsistency()); + + var project1 = _projectData.GenerateProject(generateId: true, organizationId: org1.Id); + var project2 = _projectData.GenerateProject(generateId: true, organizationId: org2.Id); + await _projectRepository.AddAsync(new[] { project1, project2 }, o => o.ImmediateConsistency()); + + var stack1 = _stackData.GenerateStack(generateId: true, organizationId: org1.Id, projectId: project1.Id); + var stack2 = _stackData.GenerateStack(generateId: true, organizationId: org2.Id, projectId: project2.Id); + await _stackRepository.AddAsync(new[] { stack1, stack2 }, o => o.ImmediateConsistency()); + + // Events inside retention for both (2 days old) + var recentStart = DateTimeOffset.UtcNow.AddDays(-2); + var recentEnd = DateTimeOffset.UtcNow.AddDays(-1); + await _eventRepository.AddAsync(_eventData.GenerateEvents(10, org1.Id, project1.Id, stack1.Id, startDate: recentStart, endDate: recentEnd), o => o.ImmediateConsistency()); + await _eventRepository.AddAsync(_eventData.GenerateEvents(10, org2.Id, project2.Id, stack2.Id, startDate: recentStart, endDate: recentEnd), o => o.ImmediateConsistency()); + + // Events at 35 days old: outside org2's effective retention (30d) but inside org1's (90d) + var olderStart = DateTimeOffset.UtcNow.AddDays(-37); + var olderEnd = DateTimeOffset.UtcNow.AddDays(-33); + await _eventRepository.AddAsync(_eventData.GenerateEvents(10, org1.Id, project1.Id, stack1.Id, startDate: olderStart, endDate: olderEnd), o => o.ImmediateConsistency()); + await _eventRepository.AddAsync(_eventData.GenerateEvents(10, org2.Id, project2.Id, stack2.Id, startDate: olderStart, endDate: olderEnd), o => o.ImmediateConsistency()); + + await _job.RunAsync(TestCancellationToken); + await RefreshDataAsync(); + + // org1 should keep all events (within 90 day effective retention) + var org1Events = await _eventRepository.CountAsync(q => q.FilterExpression($"organization:{org1.Id}")); + Assert.Equal(20, org1Events); + + // org2 should only keep recent events (older ones outside 30 day effective retention) + var org2Events = await _eventRepository.CountAsync(q => q.FilterExpression($"organization:{org2.Id}")); + Assert.Equal(10, org2Events); + } + + [Fact] + public async Task CleanupSoftDeletedStacks_WithMultiplePages_RemovesAllStacks() + { + var organization = await _organizationRepository.AddAsync(_organizationData.GenerateSampleOrganization(_billingManager, _plans), o => o.ImmediateConsistency()); + var project = await _projectRepository.AddAsync(_projectData.GenerateSampleProject(), o => o.ImmediateConsistency()); + + // Create more stacks than the page size (500) to test pagination + var stacks = new List(); + for (int i = 0; i < 600; i++) + { + var stack = _stackData.GenerateStack(generateId: true, organizationId: organization.Id, projectId: project.Id); + stack.IsDeleted = true; + stacks.Add(stack); + } + await _stackRepository.AddAsync(stacks, o => o.ImmediateConsistency()); + + // Create a valid non-deleted stack with events + var validStack = await _stackRepository.AddAsync(_stackData.GenerateStack(generateId: true, organizationId: organization.Id, projectId: project.Id), o => o.ImmediateConsistency()); + await _eventRepository.AddAsync(_eventData.GenerateEvents(5, organization.Id, project.Id, validStack.Id), o => o.ImmediateConsistency()); + + await _job.RunAsync(TestCancellationToken); + await RefreshDataAsync(); + + // All soft-deleted stacks should be removed + var remainingStacks = await _stackRepository.CountAsync(o => o.IncludeSoftDeletes()); + Assert.Equal(1, remainingStacks); + + // Valid events should remain + var eventCount = await _eventRepository.CountAsync(o => o.IncludeSoftDeletes()); + Assert.Equal(5, eventCount); + } + + [Fact] + public async Task EnforceRetention_WithEventsOutsideRetention_DeletesOnlyExpiredEvents() + { + // FreePlan has 3 day retention, but enforcement uses GetBillingPlanByUpsellingRetentionPeriod + // which returns the next plan up (SmallPlan, 30 days). So effective retention = 30 days. + var organization = _organizationData.GenerateSampleOrganization(_billingManager, _plans); + _billingManager.ApplyBillingPlan(organization, _plans.FreePlan); + await _organizationRepository.AddAsync(organization, o => o.ImmediateConsistency()); + + var project = await _projectRepository.AddAsync(_projectData.GenerateSampleProject(), o => o.ImmediateConsistency()); + var stack = await _stackRepository.AddAsync(_stackData.GenerateSampleStack(), o => o.ImmediateConsistency()); + + // Events outside effective retention (30 days) should be deleted + var outsideRetentionStart = DateTimeOffset.UtcNow.SubtractDays(37); + var outsideRetentionEnd = DateTimeOffset.UtcNow.SubtractDays(33); + await _eventRepository.AddAsync(_eventData.GenerateEvents(10, organization.Id, project.Id, stack.Id, startDate: outsideRetentionStart, endDate: outsideRetentionEnd), o => o.ImmediateConsistency()); + + // Events inside effective retention should be kept + var insideRetentionStart = DateTimeOffset.UtcNow.SubtractDays(2); + var insideRetentionEnd = DateTimeOffset.UtcNow.SubtractDays(1); + await _eventRepository.AddAsync(_eventData.GenerateEvents(10, organization.Id, project.Id, stack.Id, startDate: insideRetentionStart, endDate: insideRetentionEnd), o => o.ImmediateConsistency()); + + await _job.RunAsync(TestCancellationToken); + await RefreshDataAsync(); + + var eventCount = await _eventRepository.CountAsync(o => o.IncludeSoftDeletes()); + Assert.Equal(10, eventCount); + } } diff --git a/tests/Exceptionless.Tests/Jobs/CleanupOrphanedDataJobTests.cs b/tests/Exceptionless.Tests/Jobs/CleanupOrphanedDataJobTests.cs index af3f405b16..4aafe47c1d 100644 --- a/tests/Exceptionless.Tests/Jobs/CleanupOrphanedDataJobTests.cs +++ b/tests/Exceptionless.Tests/Jobs/CleanupOrphanedDataJobTests.cs @@ -6,6 +6,7 @@ using Exceptionless.Tests.Utility; using Foundatio.Repositories; using Foundatio.Repositories.Utility; +using Foundatio.Utility; using Xunit; namespace Exceptionless.Tests.Jobs; @@ -42,7 +43,7 @@ public CleanupOrphanedDataJobTests(ITestOutputHelper output, AppWebHostFactory f [Fact] public async Task DeleteOrphanedEventsByStack_WithValidStack_DoesNotDeleteEvents() { - // Arrange - Two tenants, each with valid stacks and events + // Arrange var organization1 = _organizationData.GenerateOrganization(_billingManager, _plans, id: TestConstants.OrganizationId); var organization2 = _organizationData.GenerateOrganization(_billingManager, _plans, id: TestConstants.OrganizationId2); await _organizationRepository.AddAsync([organization1, organization2], o => o.ImmediateConsistency()); @@ -62,7 +63,7 @@ public async Task DeleteOrphanedEventsByStack_WithValidStack_DoesNotDeleteEvents // Act await _job.RunAsync(TestCancellationToken); - // Assert - All events should remain (no orphans) + // Assert var totalCount = await _eventRepository.CountAsync(o => o.IncludeSoftDeletes().ImmediateConsistency()); Assert.Equal(200, totalCount); } @@ -70,7 +71,7 @@ public async Task DeleteOrphanedEventsByStack_WithValidStack_DoesNotDeleteEvents [Fact] public async Task DeleteOrphanedEventsByStack_WithMixedOrphanedAndValid_OnlyDeletesOrphaned() { - // Arrange - Tenant 1 has valid events; Tenant 2 has orphaned events (stack doesn't exist) + // Arrange var organization1 = _organizationData.GenerateOrganization(_billingManager, _plans, id: TestConstants.OrganizationId); var organization2 = _organizationData.GenerateOrganization(_billingManager, _plans, id: TestConstants.OrganizationId2); await _organizationRepository.AddAsync([organization1, organization2], o => o.ImmediateConsistency()); @@ -83,15 +84,10 @@ public async Task DeleteOrphanedEventsByStack_WithMixedOrphanedAndValid_OnlyDele var stack2 = _stackData.GenerateStack(generateId: true, organizationId: organization2.Id, projectId: project2.Id); await _stackRepository.AddAsync([stack1, stack2], o => o.ImmediateConsistency()); - // Valid events for both tenants var validEvents1 = _eventData.GenerateEvents(50, organization1.Id, project1.Id, stack1.Id).ToList(); var validEvents2 = _eventData.GenerateEvents(50, organization2.Id, project2.Id, stack2.Id).ToList(); - - // Orphaned events (stack IDs that don't exist) in both tenants - string fakeStackId1 = ObjectId.GenerateNewId().ToString(); - string fakeStackId2 = ObjectId.GenerateNewId().ToString(); - var orphanedEvents1 = _eventData.GenerateEvents(30, organization1.Id, project1.Id, fakeStackId1).ToList(); - var orphanedEvents2 = _eventData.GenerateEvents(20, organization2.Id, project2.Id, fakeStackId2).ToList(); + var orphanedEvents1 = _eventData.GenerateEvents(30, organization1.Id, project1.Id, ObjectId.GenerateNewId().ToString()).ToList(); + var orphanedEvents2 = _eventData.GenerateEvents(20, organization2.Id, project2.Id, ObjectId.GenerateNewId().ToString()).ToList(); await _eventRepository.AddAsync(validEvents1.Concat(validEvents2).Concat(orphanedEvents1).Concat(orphanedEvents2), o => o.ImmediateConsistency()); @@ -101,7 +97,7 @@ public async Task DeleteOrphanedEventsByStack_WithMixedOrphanedAndValid_OnlyDele // Act await _job.RunAsync(TestCancellationToken); - // Assert - Only valid events remain + // Assert var totalAfter = await _eventRepository.CountAsync(o => o.IncludeSoftDeletes().ImmediateConsistency()); Assert.Equal(100, totalAfter); } @@ -109,7 +105,7 @@ public async Task DeleteOrphanedEventsByStack_WithMixedOrphanedAndValid_OnlyDele [Fact] public async Task DeleteOrphanedEventsByStack_LargeVolume_PreservesAllValidEvents() { - // Arrange - Large volume across two tenants: 5000 valid + 10000 orphaned + // Arrange var organization = _organizationData.GenerateOrganization(_billingManager, _plans, id: TestConstants.OrganizationId); await _organizationRepository.AddAsync(organization, o => o.ImmediateConsistency()); @@ -119,10 +115,8 @@ public async Task DeleteOrphanedEventsByStack_LargeVolume_PreservesAllValidEvent var stack = _stackData.GenerateStack(id: TestConstants.StackId, organizationId: organization.Id, projectId: project.Id); await _stackRepository.AddAsync(stack, o => o.ImmediateConsistency()); - // 5000 valid events for existing stack await _eventRepository.AddAsync(_eventData.GenerateEvents(5000, organization.Id, project.Id, stack.Id), o => o.ImmediateConsistency()); - // 10000 orphaned events with many different fake stack IDs var orphanedEvents = _eventData.GenerateEvents(10000, organization.Id, project.Id).ToList(); orphanedEvents.ForEach(e => e.StackId = ObjectId.GenerateNewId().ToString()); await _eventRepository.AddAsync(orphanedEvents, o => o.ImmediateConsistency()); @@ -133,15 +127,42 @@ public async Task DeleteOrphanedEventsByStack_LargeVolume_PreservesAllValidEvent // Act await _job.RunAsync(TestCancellationToken); - // Assert - Only the 5000 valid events remain + // Assert var totalAfter = await _eventRepository.CountAsync(o => o.IncludeSoftDeletes().ImmediateConsistency()); Assert.Equal(5000, totalAfter); } + [Fact] + public async Task DeleteOrphanedEventsByStack_WithManyUniqueOrphanedStacks_DeletesAllOrphanedEvents() + { + // Arrange + var organization = _organizationData.GenerateSampleOrganization(_billingManager, _plans); + await _organizationRepository.AddAsync(organization, o => o.ImmediateConsistency()); + var project = await _projectRepository.AddAsync(_projectData.GenerateSampleProject(), o => o.ImmediateConsistency()); + + var stack = await _stackRepository.AddAsync(_stackData.GenerateSampleStack(), o => o.ImmediateConsistency()); + await _eventRepository.AddAsync(_eventData.GenerateEvents(50, organization.Id, project.Id, stack.Id), o => o.ImmediateConsistency()); + + var orphanedEvents = _eventData.GenerateEvents(200, organization.Id, project.Id).ToList(); + var uniqueStackIds = Enumerable.Range(0, 200).Select(_ => ObjectId.GenerateNewId().ToString()).ToList(); + for (int i = 0; i < orphanedEvents.Count; i++) + orphanedEvents[i].StackId = uniqueStackIds[i]; + + await _eventRepository.AddAsync(orphanedEvents, o => o.ImmediateConsistency()); + + Assert.Equal(250, await _eventRepository.CountAsync(o => o.IncludeSoftDeletes().ImmediateConsistency())); + + // Act + await _job.RunAsync(TestCancellationToken); + + // Assert + Assert.Equal(50, await _eventRepository.CountAsync(o => o.IncludeSoftDeletes().ImmediateConsistency())); + } + [Fact] public async Task DeleteOrphanedEventsByStack_MultipleValidStacks_PreservesAll() { - // Arrange - Multiple valid stacks in two organizations, no orphans + // Arrange var organization1 = _organizationData.GenerateOrganization(_billingManager, _plans, id: TestConstants.OrganizationId); var organization2 = _organizationData.GenerateOrganization(_billingManager, _plans, id: TestConstants.OrganizationId2); await _organizationRepository.AddAsync([organization1, organization2], o => o.ImmediateConsistency()); @@ -150,7 +171,6 @@ public async Task DeleteOrphanedEventsByStack_MultipleValidStacks_PreservesAll() var project2 = _projectData.GenerateProject(generateId: true, organizationId: organization2.Id); await _projectRepository.AddAsync([project1, project2], o => o.ImmediateConsistency()); - // Multiple stacks per project var stacks = new List(); for (int i = 0; i < 10; i++) { @@ -159,7 +179,6 @@ public async Task DeleteOrphanedEventsByStack_MultipleValidStacks_PreservesAll() } await _stackRepository.AddAsync(stacks, o => o.ImmediateConsistency()); - // Events across all stacks var events = new List(); foreach (var stack in stacks) events.AddRange(_eventData.GenerateEvents(10, stack.OrganizationId, stack.ProjectId, stack.Id)); @@ -168,7 +187,7 @@ public async Task DeleteOrphanedEventsByStack_MultipleValidStacks_PreservesAll() // Act await _job.RunAsync(TestCancellationToken); - // Assert - All 200 events preserved + // Assert var totalAfter = await _eventRepository.CountAsync(o => o.IncludeSoftDeletes().ImmediateConsistency()); Assert.Equal(200, totalAfter); } @@ -176,7 +195,7 @@ public async Task DeleteOrphanedEventsByStack_MultipleValidStacks_PreservesAll() [Fact] public async Task DeleteOrphanedEventsByStack_OnlyOrphanedEventsInOneTenant_OtherTenantUnaffected() { - // Arrange - Tenant 1 has all orphaned events (will be deleted); Tenant 2 has all valid events + // Arrange var organization1 = _organizationData.GenerateOrganization(_billingManager, _plans, id: TestConstants.OrganizationId); var organization2 = _organizationData.GenerateOrganization(_billingManager, _plans, id: TestConstants.OrganizationId2); await _organizationRepository.AddAsync([organization1, organization2], o => o.ImmediateConsistency()); @@ -185,15 +204,11 @@ public async Task DeleteOrphanedEventsByStack_OnlyOrphanedEventsInOneTenant_Othe var project2 = _projectData.GenerateProject(generateId: true, organizationId: organization2.Id); await _projectRepository.AddAsync([project1, project2], o => o.ImmediateConsistency()); - // Tenant 2 has a valid stack var validStack = _stackData.GenerateStack(generateId: true, organizationId: organization2.Id, projectId: project2.Id); await _stackRepository.AddAsync(validStack, o => o.ImmediateConsistency()); - // Tenant 1 events are all orphaned (fake stack IDs) var orphanedEvents = _eventData.GenerateEvents(100, organization1.Id, project1.Id).ToList(); orphanedEvents.ForEach(e => e.StackId = ObjectId.GenerateNewId().ToString()); - - // Tenant 2 events are all valid var validEvents = _eventData.GenerateEvents(100, organization2.Id, project2.Id, validStack.Id).ToList(); await _eventRepository.AddAsync(orphanedEvents.Concat(validEvents), o => o.ImmediateConsistency()); @@ -201,11 +216,36 @@ public async Task DeleteOrphanedEventsByStack_OnlyOrphanedEventsInOneTenant_Othe // Act await _job.RunAsync(TestCancellationToken); - // Assert - Only tenant 2's events remain + // Assert var totalAfter = await _eventRepository.CountAsync(o => o.IncludeSoftDeletes().ImmediateConsistency()); Assert.Equal(100, totalAfter); } + [Fact] + public async Task DeleteOrphanedEventsByStack_WithSoftDeletedStack_DeletesOrphanedEvents() + { + // Arrange + var organization = _organizationData.GenerateSampleOrganization(_billingManager, _plans); + await _organizationRepository.AddAsync(organization, o => o.ImmediateConsistency()); + var project = await _projectRepository.AddAsync(_projectData.GenerateSampleProject(), o => o.ImmediateConsistency()); + + var stack = await _stackRepository.AddAsync(_stackData.GenerateSampleStack(), o => o.ImmediateConsistency()); + await _eventRepository.AddAsync(_eventData.GenerateEvents(50, organization.Id, project.Id, stack.Id), o => o.ImmediateConsistency()); + + var softDeletedStack = _stackData.GenerateStack(generateId: true, organizationId: organization.Id, projectId: project.Id); + softDeletedStack.IsDeleted = true; + softDeletedStack = await _stackRepository.AddAsync(softDeletedStack, o => o.ImmediateConsistency()); + await _eventRepository.AddAsync(_eventData.GenerateEvents(100, organization.Id, project.Id, softDeletedStack.Id), o => o.ImmediateConsistency()); + + Assert.Equal(150, await _eventRepository.CountAsync(o => o.IncludeSoftDeletes().ImmediateConsistency())); + + // Act + await _job.RunAsync(TestCancellationToken); + + // Assert + Assert.Equal(50, await _eventRepository.CountAsync(o => o.IncludeSoftDeletes().ImmediateConsistency())); + } + [Fact] public async Task DeleteOrphanedEventsByProject_WithValidProjects_DoesNotDeleteEvents() { @@ -236,7 +276,7 @@ public async Task DeleteOrphanedEventsByProject_WithValidProjects_DoesNotDeleteE [Fact] public async Task DeleteOrphanedEventsByProject_WithOrphanedProject_DeletesEventsForMissingProject() { - // Arrange - Events reference a project that doesn't exist + // Arrange var organization1 = _organizationData.GenerateOrganization(_billingManager, _plans, id: TestConstants.OrganizationId); var organization2 = _organizationData.GenerateOrganization(_billingManager, _plans, id: TestConstants.OrganizationId2); await _organizationRepository.AddAsync([organization1, organization2], o => o.ImmediateConsistency()); @@ -247,10 +287,7 @@ public async Task DeleteOrphanedEventsByProject_WithOrphanedProject_DeletesEvent var validStack = _stackData.GenerateStack(id: TestConstants.StackId, organizationId: organization1.Id, projectId: validProject.Id); await _stackRepository.AddAsync(validStack, o => o.ImmediateConsistency()); - // Valid events for existing project var validEvents = _eventData.GenerateEvents(75, organization1.Id, validProject.Id, validStack.Id).ToList(); - - // Orphaned events referencing a non-existent project in organization 2 string fakeProjectId = ObjectId.GenerateNewId().ToString(); string fakeStackId = ObjectId.GenerateNewId().ToString(); var orphanedEvents = _eventData.GenerateEvents(50, organization2.Id, fakeProjectId, fakeStackId).ToList(); @@ -260,30 +297,50 @@ public async Task DeleteOrphanedEventsByProject_WithOrphanedProject_DeletesEvent // Act await _job.RunAsync(TestCancellationToken); - // Assert - Orphaned events deleted, valid events preserved + // Assert var totalAfter = await _eventRepository.CountAsync(o => o.IncludeSoftDeletes().ImmediateConsistency()); Assert.Equal(75, totalAfter); } + [Fact] + public async Task DeleteOrphanedEventsByProject_WithMissingProjects_DeletesOrphanedEvents() + { + // Arrange + var organization = _organizationData.GenerateSampleOrganization(_billingManager, _plans); + await _organizationRepository.AddAsync(organization, o => o.ImmediateConsistency()); + var project = await _projectRepository.AddAsync(_projectData.GenerateSampleProject(), o => o.ImmediateConsistency()); + var stack = await _stackRepository.AddAsync(_stackData.GenerateSampleStack(), o => o.ImmediateConsistency()); + await _eventRepository.AddAsync(_eventData.GenerateEvents(50, organization.Id, project.Id, stack.Id), o => o.ImmediateConsistency()); + + string fakeProjectId = ObjectId.GenerateNewId().ToString(); + var orphanedEvents = _eventData.GenerateEvents(100, organization.Id, fakeProjectId).ToList(); + orphanedEvents.ForEach(e => e.StackId = stack.Id); + await _eventRepository.AddAsync(orphanedEvents, o => o.ImmediateConsistency()); + + Assert.Equal(150, await _eventRepository.CountAsync(o => o.IncludeSoftDeletes().ImmediateConsistency())); + + // Act + await _job.RunAsync(TestCancellationToken); + + // Assert + Assert.Equal(50, await _eventRepository.CountAsync(o => o.IncludeSoftDeletes().ImmediateConsistency())); + } + [Fact] public async Task DeleteOrphanedEventsByProject_MultiTenant_EachTenantIndependent() { - // Arrange - Tenant 1 has valid project, Tenant 2 has orphaned project + // Arrange var organization1 = _organizationData.GenerateOrganization(_billingManager, _plans, id: TestConstants.OrganizationId); var organization2 = _organizationData.GenerateOrganization(_billingManager, _plans, id: TestConstants.OrganizationId2); await _organizationRepository.AddAsync([organization1, organization2], o => o.ImmediateConsistency()); - // Only Tenant 1 has a real project var project1 = _projectData.GenerateProject(id: TestConstants.ProjectId, organizationId: organization1.Id); await _projectRepository.AddAsync(project1, o => o.ImmediateConsistency()); var stack1 = _stackData.GenerateStack(id: TestConstants.StackId, organizationId: organization1.Id, projectId: project1.Id); await _stackRepository.AddAsync(stack1, o => o.ImmediateConsistency()); - // Tenant 1 valid events var validEvents = _eventData.GenerateEvents(60, organization1.Id, project1.Id, stack1.Id).ToList(); - - // Tenant 2 orphaned events (project doesn't exist) string nonExistentProjectId = ObjectId.GenerateNewId().ToString(); string fakeStackId = ObjectId.GenerateNewId().ToString(); var orphanedEvents = _eventData.GenerateEvents(40, organization2.Id, nonExistentProjectId, fakeStackId).ToList(); @@ -298,6 +355,33 @@ public async Task DeleteOrphanedEventsByProject_MultiTenant_EachTenantIndependen Assert.Equal(60, totalAfter); } + [Fact] + public async Task DeleteOrphanedEventsByProject_WithSoftDeletedProject_DeletesOrphanedEvents() + { + // Arrange + var organization = _organizationData.GenerateSampleOrganization(_billingManager, _plans); + await _organizationRepository.AddAsync(organization, o => o.ImmediateConsistency()); + + var project = await _projectRepository.AddAsync(_projectData.GenerateSampleProject(), o => o.ImmediateConsistency()); + var stack = await _stackRepository.AddAsync(_stackData.GenerateSampleStack(), o => o.ImmediateConsistency()); + await _eventRepository.AddAsync(_eventData.GenerateEvents(50, organization.Id, project.Id, stack.Id), o => o.ImmediateConsistency()); + + var softDeletedProject = _projectData.GenerateProject(generateId: true, organizationId: organization.Id); + softDeletedProject.IsDeleted = true; + softDeletedProject = await _projectRepository.AddAsync(softDeletedProject, o => o.ImmediateConsistency()); + var orphanedEvents = _eventData.GenerateEvents(100, organization.Id, softDeletedProject.Id).ToList(); + orphanedEvents.ForEach(e => e.StackId = stack.Id); + await _eventRepository.AddAsync(orphanedEvents, o => o.ImmediateConsistency()); + + Assert.Equal(150, await _eventRepository.CountAsync(o => o.IncludeSoftDeletes().ImmediateConsistency())); + + // Act + await _job.RunAsync(TestCancellationToken); + + // Assert + Assert.Equal(50, await _eventRepository.CountAsync(o => o.IncludeSoftDeletes().ImmediateConsistency())); + } + [Fact] public async Task DeleteOrphanedEventsByOrganization_WithValidOrganizations_DoesNotDeleteEvents() { @@ -321,7 +405,7 @@ public async Task DeleteOrphanedEventsByOrganization_WithValidOrganizations_Does // Act await _job.RunAsync(TestCancellationToken); - // Assert - All events preserved (both organizations exist) + // Assert var totalAfter = await _eventRepository.CountAsync(o => o.IncludeSoftDeletes().ImmediateConsistency()); Assert.Equal(160, totalAfter); } @@ -329,7 +413,7 @@ public async Task DeleteOrphanedEventsByOrganization_WithValidOrganizations_Does [Fact] public async Task DeleteOrphanedEventsByOrganization_WithOrphanedOrganization_DeletesEventsForMissingOrganization() { - // Arrange - Valid organization1 + events referencing non-existent organization + // Arrange var organization1 = _organizationData.GenerateOrganization(_billingManager, _plans, id: TestConstants.OrganizationId); await _organizationRepository.AddAsync(organization1, o => o.ImmediateConsistency()); @@ -339,10 +423,7 @@ public async Task DeleteOrphanedEventsByOrganization_WithOrphanedOrganization_De var stack = _stackData.GenerateStack(id: TestConstants.StackId, organizationId: organization1.Id, projectId: project.Id); await _stackRepository.AddAsync(stack, o => o.ImmediateConsistency()); - // Valid events var validEvents = _eventData.GenerateEvents(100, organization1.Id, project.Id, stack.Id).ToList(); - - // Orphaned events referencing a non-existent organization string fakeOrganizationId = ObjectId.GenerateNewId().ToString(); string fakeProjectId = ObjectId.GenerateNewId().ToString(); string fakeStackId = ObjectId.GenerateNewId().ToString(); @@ -353,15 +434,39 @@ public async Task DeleteOrphanedEventsByOrganization_WithOrphanedOrganization_De // Act await _job.RunAsync(TestCancellationToken); - // Assert - Only valid events survive + // Assert var totalAfter = await _eventRepository.CountAsync(o => o.IncludeSoftDeletes().ImmediateConsistency()); Assert.Equal(100, totalAfter); } + [Fact] + public async Task DeleteOrphanedEventsByOrganization_WithMissingOrganizations_DeletesOrphanedEvents() + { + // Arrange + var organization = _organizationData.GenerateSampleOrganization(_billingManager, _plans); + await _organizationRepository.AddAsync(organization, o => o.ImmediateConsistency()); + var project = await _projectRepository.AddAsync(_projectData.GenerateSampleProject(), o => o.ImmediateConsistency()); + var stack = await _stackRepository.AddAsync(_stackData.GenerateSampleStack(), o => o.ImmediateConsistency()); + await _eventRepository.AddAsync(_eventData.GenerateEvents(50, organization.Id, project.Id, stack.Id), o => o.ImmediateConsistency()); + + string fakeOrganizationId = ObjectId.GenerateNewId().ToString(); + var orphanedEvents = _eventData.GenerateEvents(100, fakeOrganizationId, project.Id).ToList(); + orphanedEvents.ForEach(e => e.StackId = stack.Id); + await _eventRepository.AddAsync(orphanedEvents, o => o.ImmediateConsistency()); + + Assert.Equal(150, await _eventRepository.CountAsync(o => o.IncludeSoftDeletes().ImmediateConsistency())); + + // Act + await _job.RunAsync(TestCancellationToken); + + // Assert + Assert.Equal(50, await _eventRepository.CountAsync(o => o.IncludeSoftDeletes().ImmediateConsistency())); + } + [Fact] public async Task DeleteOrphanedEventsByOrganization_TwoTenantsOneDeleted_OnlyDeletesOrphanedTenantEvents() { - // Arrange - Organization 1 exists, Organization 2 does NOT exist (never created) but has events + // Arrange var organization1 = _organizationData.GenerateOrganization(_billingManager, _plans, id: TestConstants.OrganizationId); await _organizationRepository.AddAsync(organization1, o => o.ImmediateConsistency()); @@ -371,10 +476,7 @@ public async Task DeleteOrphanedEventsByOrganization_TwoTenantsOneDeleted_OnlyDe var stack1 = _stackData.GenerateStack(id: TestConstants.StackId, organizationId: organization1.Id, projectId: project1.Id); await _stackRepository.AddAsync(stack1, o => o.ImmediateConsistency()); - // Tenant 1 valid events var validEvents = _eventData.GenerateEvents(120, organization1.Id, project1.Id, stack1.Id).ToList(); - - // Tenant 2 events (organization doesn't exist, simulates post-hard-delete orphans) string ghostOrganizationId = ObjectId.GenerateNewId().ToString(); string ghostProjectId = ObjectId.GenerateNewId().ToString(); string ghostStackId = ObjectId.GenerateNewId().ToString(); @@ -390,17 +492,43 @@ public async Task DeleteOrphanedEventsByOrganization_TwoTenantsOneDeleted_OnlyDe Assert.Equal(120, totalAfter); } + [Fact] + public async Task DeleteOrphanedEventsByOrganization_WithSoftDeletedOrganization_DeletesOrphanedEvents() + { + // Arrange + var validOrganization = _organizationData.GenerateSampleOrganization(_billingManager, _plans); + await _organizationRepository.AddAsync(validOrganization, o => o.ImmediateConsistency()); + var project = await _projectRepository.AddAsync(_projectData.GenerateSampleProject(), o => o.ImmediateConsistency()); + var stack = await _stackRepository.AddAsync(_stackData.GenerateSampleStack(), o => o.ImmediateConsistency()); + await _eventRepository.AddAsync(_eventData.GenerateEvents(50, validOrganization.Id, project.Id, stack.Id), o => o.ImmediateConsistency()); + + var softDeletedOrganization = _organizationData.GenerateOrganization(_billingManager, _plans, generateId: true); + softDeletedOrganization.IsDeleted = true; + softDeletedOrganization = await _organizationRepository.AddAsync(softDeletedOrganization, o => o.ImmediateConsistency()); + var orphanedEvents = _eventData.GenerateEvents(100, softDeletedOrganization.Id, project.Id).ToList(); + orphanedEvents.ForEach(e => e.StackId = stack.Id); + await _eventRepository.AddAsync(orphanedEvents, o => o.ImmediateConsistency()); + + Assert.Equal(150, await _eventRepository.CountAsync(o => o.IncludeSoftDeletes().ImmediateConsistency())); + + // Act + await _job.RunAsync(TestCancellationToken); + + // Assert + Assert.Equal(50, await _eventRepository.CountAsync(o => o.IncludeSoftDeletes().ImmediateConsistency())); + } + [Fact] public async Task FixDuplicateStacks_WithDuplicatesAcrossTenants_MergesCorrectly() { - // Arrange - Two stacks in the same project with the same signature (duplicate) + // Arrange var organization = _organizationData.GenerateOrganization(_billingManager, _plans, id: TestConstants.OrganizationId); await _organizationRepository.AddAsync(organization, o => o.ImmediateConsistency()); var project = _projectData.GenerateProject(id: TestConstants.ProjectId, organizationId: organization.Id); await _projectRepository.AddAsync(project, o => o.ImmediateConsistency()); - string signatureHash = "abc123def456"; + const string signatureHash = "abc123def456"; var stack1 = _stackData.GenerateStack(generateId: true, organizationId: organization.Id, projectId: project.Id, signatureHash: signatureHash); stack1.CreatedUtc = DateTime.UtcNow.AddDays(-10); stack1.TotalOccurrences = 5; @@ -409,7 +537,6 @@ public async Task FixDuplicateStacks_WithDuplicatesAcrossTenants_MergesCorrectly stack2.TotalOccurrences = 10; await _stackRepository.AddAsync([stack1, stack2], o => o.ImmediateConsistency()); - // Events on both stacks var events1 = _eventData.GenerateEvents(3, organization.Id, project.Id, stack1.Id).ToList(); var events2 = _eventData.GenerateEvents(7, organization.Id, project.Id, stack2.Id).ToList(); await _eventRepository.AddAsync(events1.Concat(events2), o => o.ImmediateConsistency()); @@ -417,7 +544,7 @@ public async Task FixDuplicateStacks_WithDuplicatesAcrossTenants_MergesCorrectly // Act await _job.RunAsync(TestCancellationToken); - // Assert - One stack should be deleted, all events should point to the surviving stack + // Assert await RefreshDataAsync(); var allStacks = await _stackRepository.GetAllAsync(o => o.IncludeSoftDeletes()); var activeStacks = allStacks.Documents.Where(s => !s.IsDeleted).ToList(); @@ -425,16 +552,53 @@ public async Task FixDuplicateStacks_WithDuplicatesAcrossTenants_MergesCorrectly Assert.Single(activeStacks); Assert.Single(deletedStacks); - // All events should now reference the surviving stack var allEvents = await _eventRepository.GetAllAsync(); Assert.Equal(10, allEvents.Total); Assert.All(allEvents.Documents, e => Assert.Equal(activeStacks[0].Id, e.StackId)); } + [Fact] + public async Task FixDuplicateStacks_WithDuplicateSignatures_MergesIntoMostPopularStack() + { + // Arrange + var organization = _organizationData.GenerateSampleOrganization(_billingManager, _plans); + await _organizationRepository.AddAsync(organization, o => o.ImmediateConsistency()); + var project = await _projectRepository.AddAsync(_projectData.GenerateSampleProject(), o => o.ImmediateConsistency()); + + var originalStack = _stackData.GenerateStack(generateId: true, organizationId: organization.Id, projectId: project.Id); + originalStack.TotalOccurrences = 100; + var duplicateStack = originalStack.DeepClone(); + duplicateStack.Id = ObjectId.GenerateNewId().ToString(); + duplicateStack.TotalOccurrences = 10; + + originalStack = await _stackRepository.AddAsync(originalStack, o => o.ImmediateConsistency()); + duplicateStack = await _stackRepository.AddAsync(duplicateStack, o => o.ImmediateConsistency()); + await _eventRepository.AddAsync(_eventData.GenerateEvents(100, organization.Id, project.Id, originalStack.Id), o => o.ImmediateConsistency()); + await _eventRepository.AddAsync(_eventData.GenerateEvents(10, organization.Id, project.Id, duplicateStack.Id), o => o.ImmediateConsistency()); + + // Act + await _job.RunAsync(TestCancellationToken); + await RefreshDataAsync(); + + // Assert + var updatedOriginal = await _stackRepository.GetByIdAsync(originalStack.Id, o => o.IncludeSoftDeletes()); + var updatedDuplicate = await _stackRepository.GetByIdAsync(duplicateStack.Id, o => o.IncludeSoftDeletes()); + + Assert.NotNull(updatedOriginal); + Assert.NotNull(updatedDuplicate); + Assert.False(updatedOriginal.IsDeleted); + Assert.True(updatedDuplicate.IsDeleted); + Assert.Equal(110, updatedOriginal.TotalOccurrences); + + Assert.Equal(110, await _eventRepository.CountAsync(o => o.IncludeSoftDeletes().ImmediateConsistency())); + Assert.Equal(110, await _eventRepository.CountAsync(q => q.Stack(originalStack.Id))); + Assert.Equal(0, await _eventRepository.CountAsync(q => q.Stack(duplicateStack.Id))); + } + [Fact] public async Task FixDuplicateStacks_NoDuplicates_DoesNotModifyAnything() { - // Arrange - Two stacks with different signatures across two tenants + // Arrange var organization1 = _organizationData.GenerateOrganization(_billingManager, _plans, id: TestConstants.OrganizationId); var organization2 = _organizationData.GenerateOrganization(_billingManager, _plans, id: TestConstants.OrganizationId2); await _organizationRepository.AddAsync([organization1, organization2], o => o.ImmediateConsistency()); @@ -454,7 +618,7 @@ public async Task FixDuplicateStacks_NoDuplicates_DoesNotModifyAnything() // Act await _job.RunAsync(TestCancellationToken); - // Assert - Nothing deleted + // Assert var allStacks = await _stackRepository.GetAllAsync(o => o.IncludeSoftDeletes()); Assert.Equal(2, allStacks.Total); Assert.All(allStacks.Documents, s => Assert.False(s.IsDeleted)); @@ -463,10 +627,41 @@ public async Task FixDuplicateStacks_NoDuplicates_DoesNotModifyAnything() Assert.Equal(40, totalEvents); } + [Fact] + public async Task FixDuplicateStacks_WithNoEvents_KeepsOldestStack() + { + // Arrange + var organization = _organizationData.GenerateSampleOrganization(_billingManager, _plans); + await _organizationRepository.AddAsync(organization, o => o.ImmediateConsistency()); + var project = await _projectRepository.AddAsync(_projectData.GenerateSampleProject(), o => o.ImmediateConsistency()); + + var originalStack = _stackData.GenerateStack(generateId: true, organizationId: organization.Id, projectId: project.Id); + originalStack.CreatedUtc = DateTime.UtcNow.AddMinutes(-10); + var duplicateStack = originalStack.DeepClone(); + duplicateStack.Id = ObjectId.GenerateNewId().ToString(); + duplicateStack.CreatedUtc = originalStack.CreatedUtc.AddMinutes(1); + + originalStack = await _stackRepository.AddAsync(originalStack, o => o.ImmediateConsistency()); + duplicateStack = await _stackRepository.AddAsync(duplicateStack, o => o.ImmediateConsistency()); + + // Act + await _job.RunAsync(TestCancellationToken); + await RefreshDataAsync(); + + // Assert + var updatedOriginal = await _stackRepository.GetByIdAsync(originalStack.Id, o => o.IncludeSoftDeletes()); + var updatedDuplicate = await _stackRepository.GetByIdAsync(duplicateStack.Id, o => o.IncludeSoftDeletes()); + + Assert.NotNull(updatedOriginal); + Assert.NotNull(updatedDuplicate); + Assert.False(updatedOriginal.IsDeleted); + Assert.True(updatedDuplicate.IsDeleted); + } + [Fact] public async Task RunAsync_AllOrphanTypes_CleansUpCorrectly() { - // Arrange - Complex scenario: valid data, orphaned by stack, orphaned by project, orphaned by organization + // Arrange var organization1 = _organizationData.GenerateOrganization(_billingManager, _plans, id: TestConstants.OrganizationId); await _organizationRepository.AddAsync(organization1, o => o.ImmediateConsistency()); @@ -476,23 +671,10 @@ public async Task RunAsync_AllOrphanTypes_CleansUpCorrectly() var validStack = _stackData.GenerateStack(id: TestConstants.StackId, organizationId: organization1.Id, projectId: project1.Id); await _stackRepository.AddAsync(validStack, o => o.ImmediateConsistency()); - // 100 valid events var validEvents = _eventData.GenerateEvents(100, organization1.Id, project1.Id, validStack.Id).ToList(); - - // 25 orphaned by stack (stack doesn't exist) - string fakeStack = ObjectId.GenerateNewId().ToString(); - var orphanedByStack = _eventData.GenerateEvents(25, organization1.Id, project1.Id, fakeStack).ToList(); - - // 25 orphaned by project (project doesn't exist) - string fakeProject = ObjectId.GenerateNewId().ToString(); - string fakeStack2 = ObjectId.GenerateNewId().ToString(); - var orphanedByProject = _eventData.GenerateEvents(25, organization1.Id, fakeProject, fakeStack2).ToList(); - - // 25 orphaned by organization (organization doesn't exist) - string fakeOrganizationId = ObjectId.GenerateNewId().ToString(); - string fakeProject2 = ObjectId.GenerateNewId().ToString(); - string fakeStack3 = ObjectId.GenerateNewId().ToString(); - var orphanedByOrganization = _eventData.GenerateEvents(25, fakeOrganizationId, fakeProject2, fakeStack3).ToList(); + var orphanedByStack = _eventData.GenerateEvents(25, organization1.Id, project1.Id, ObjectId.GenerateNewId().ToString()).ToList(); + var orphanedByProject = _eventData.GenerateEvents(25, organization1.Id, ObjectId.GenerateNewId().ToString(), ObjectId.GenerateNewId().ToString()).ToList(); + var orphanedByOrganization = _eventData.GenerateEvents(25, ObjectId.GenerateNewId().ToString(), ObjectId.GenerateNewId().ToString(), ObjectId.GenerateNewId().ToString()).ToList(); await _eventRepository.AddAsync(validEvents.Concat(orphanedByStack).Concat(orphanedByProject).Concat(orphanedByOrganization), o => o.ImmediateConsistency()); @@ -502,7 +684,7 @@ public async Task RunAsync_AllOrphanTypes_CleansUpCorrectly() // Act await _job.RunAsync(TestCancellationToken); - // Assert - Only 100 valid events remain + // Assert var totalAfter = await _eventRepository.CountAsync(o => o.IncludeSoftDeletes().ImmediateConsistency()); Assert.Equal(100, totalAfter); } @@ -510,7 +692,7 @@ public async Task RunAsync_AllOrphanTypes_CleansUpCorrectly() [Fact] public async Task RunAsync_NoOrphans_PreservesEverything() { - // Arrange - Two complete tenants, no orphans anywhere + // Arrange var organization1 = _organizationData.GenerateOrganization(_billingManager, _plans, id: TestConstants.OrganizationId); var organization2 = _organizationData.GenerateOrganization(_billingManager, _plans, id: TestConstants.OrganizationId2); await _organizationRepository.AddAsync([organization1, organization2], o => o.ImmediateConsistency()); @@ -530,7 +712,7 @@ public async Task RunAsync_NoOrphans_PreservesEverything() // Act await _job.RunAsync(TestCancellationToken); - // Assert - All 400 events preserved + // Assert var totalAfter = await _eventRepository.CountAsync(o => o.IncludeSoftDeletes().ImmediateConsistency()); Assert.Equal(400, totalAfter); } @@ -538,11 +720,12 @@ public async Task RunAsync_NoOrphans_PreservesEverything() [Fact] public async Task RunAsync_EmptyDatabase_CompletesWithoutError() { - // Arrange - nothing + // Arrange - // Act & Assert - should not throw + // Act await _job.RunAsync(TestCancellationToken); + // Assert var totalAfter = await _eventRepository.CountAsync(o => o.IncludeSoftDeletes().ImmediateConsistency()); Assert.Equal(0, totalAfter); } diff --git a/tests/Exceptionless.Tests/Repositories/EventRepositoryTests.cs b/tests/Exceptionless.Tests/Repositories/EventRepositoryTests.cs index 95c5d2b628..7d146b0ca9 100644 --- a/tests/Exceptionless.Tests/Repositories/EventRepositoryTests.cs +++ b/tests/Exceptionless.Tests/Repositories/EventRepositoryTests.cs @@ -275,4 +275,183 @@ private async Task CreateDataAsync() _ids.Add(Tuple.Create(ev.Id, date)); } } + + [Fact] + public async Task GetDistinctStackIds_WithMultipleStacks_ReturnsAllUniqueIds() + { + // Arrange + var stack1 = await _stackRepository.AddAsync(_stackData.GenerateStack(generateId: true, organizationId: TestConstants.OrganizationId, projectId: TestConstants.ProjectId), o => o.ImmediateConsistency()); + var stack2 = await _stackRepository.AddAsync(_stackData.GenerateStack(generateId: true, organizationId: TestConstants.OrganizationId, projectId: TestConstants.ProjectId), o => o.ImmediateConsistency()); + + await _repository.AddAsync(_eventData.GenerateEvents(5, TestConstants.OrganizationId, TestConstants.ProjectId, stack1.Id), o => o.ImmediateConsistency()); + await _repository.AddAsync(_eventData.GenerateEvents(3, TestConstants.OrganizationId, TestConstants.ProjectId, stack2.Id), o => o.ImmediateConsistency()); + + // Act + var afterKey = new CompositeKeyResult(); + var stackIds = await _repository.GetDistinctStackIdsAsync(10000, afterKey); + + // Assert + Assert.Equal(stackIds.Count, stackIds.Distinct(StringComparer.Ordinal).Count()); + Assert.Contains(stack1.Id, stackIds); + Assert.Contains(stack2.Id, stackIds); + } + + [Fact] + public async Task GetDistinctStackIds_WithPagination_ReturnsAllIds() + { + // Arrange + var stacks = new List(); + for (int i = 0; i < 5; i++) + stacks.Add(await _stackRepository.AddAsync(_stackData.GenerateStack(generateId: true, organizationId: TestConstants.OrganizationId, projectId: TestConstants.ProjectId), o => o.ImmediateConsistency())); + + foreach (var stack in stacks) + await _repository.AddAsync(_eventData.GenerateEvents(2, TestConstants.OrganizationId, TestConstants.ProjectId, stack.Id), o => o.ImmediateConsistency()); + + // Act - page through with batch size of 2 + var allIds = new List(); + var afterKey = new CompositeKeyResult(); + IReadOnlyCollection batch; + do + { + batch = await _repository.GetDistinctStackIdsAsync(2, afterKey); + allIds.AddRange(batch); + } while (afterKey.AfterKey.Count > 0); + + // Assert + Assert.Equal(allIds.Count, allIds.Distinct(StringComparer.Ordinal).Count()); + foreach (var stack in stacks) + Assert.Contains(stack.Id, allIds); + } + + [Fact] + public async Task ReassignStack_WithSourceEvents_MovesAllEventsToTarget() + { + // Arrange + var stack1 = await _stackRepository.AddAsync(_stackData.GenerateStack(generateId: true, organizationId: TestConstants.OrganizationId, projectId: TestConstants.ProjectId), o => o.ImmediateConsistency()); + var stack2 = await _stackRepository.AddAsync(_stackData.GenerateStack(generateId: true, organizationId: TestConstants.OrganizationId, projectId: TestConstants.ProjectId), o => o.ImmediateConsistency()); + + await _repository.AddAsync(_eventData.GenerateEvents(10, TestConstants.OrganizationId, TestConstants.ProjectId, stack1.Id), o => o.ImmediateConsistency()); + await _repository.AddAsync(_eventData.GenerateEvents(5, TestConstants.OrganizationId, TestConstants.ProjectId, stack2.Id), o => o.ImmediateConsistency()); + + // Act + long affected = await _repository.ReassignStackAsync([stack1.Id], stack2.Id); + + // Assert + Assert.Equal(10, affected); + + await RefreshDataAsync(); + + Assert.Equal(0, await _repository.CountAsync(q => q.Stack(stack1.Id))); + Assert.Equal(15, await _repository.CountAsync(q => q.Stack(stack2.Id))); + } + + [Fact] + public async Task RemoveAllByProjectIds_WithMatchingEvents_RemovesAll() + { + // Arrange + var stack = await _stackRepository.AddAsync(_stackData.GenerateStack(generateId: true, organizationId: TestConstants.OrganizationId, projectId: TestConstants.ProjectId), o => o.ImmediateConsistency()); + await _repository.AddAsync(_eventData.GenerateEvents(10, TestConstants.OrganizationId, TestConstants.ProjectId, stack.Id), o => o.ImmediateConsistency()); + + // Act + long removed = await _repository.RemoveAllByProjectIdsAsync([TestConstants.ProjectId]); + + // Assert + Assert.Equal(10, removed); + + await RefreshDataAsync(); + Assert.Equal(0, await _repository.CountAsync(o => o.IncludeSoftDeletes())); + } + + [Fact] + public async Task RemoveAllByOrganizationIds_WithMatchingEvents_RemovesAll() + { + // Arrange + var stack = await _stackRepository.AddAsync(_stackData.GenerateStack(generateId: true, organizationId: TestConstants.OrganizationId, projectId: TestConstants.ProjectId), o => o.ImmediateConsistency()); + await _repository.AddAsync(_eventData.GenerateEvents(10, TestConstants.OrganizationId, TestConstants.ProjectId, stack.Id), o => o.ImmediateConsistency()); + + // Act + long removed = await _repository.RemoveAllByOrganizationIdsAsync([TestConstants.OrganizationId]); + + // Assert + Assert.Equal(10, removed); + + await RefreshDataAsync(); + Assert.Equal(0, await _repository.CountAsync(o => o.IncludeSoftDeletes())); + } + + [Fact] + public async Task RemoveAllByStackIds_WithMatchingEvents_RemovesAll() + { + // Arrange + var stack = await _stackRepository.AddAsync(_stackData.GenerateStack(generateId: true, organizationId: TestConstants.OrganizationId, projectId: TestConstants.ProjectId), o => o.ImmediateConsistency()); + await _repository.AddAsync(_eventData.GenerateEvents(10, TestConstants.OrganizationId, TestConstants.ProjectId, stack.Id), o => o.ImmediateConsistency()); + + // Act + long removed = await _repository.RemoveAllByStackIdsAsync([stack.Id]); + + // Assert + Assert.Equal(10, removed); + + await RefreshDataAsync(); + Assert.Equal(0, await _repository.CountAsync(o => o.IncludeSoftDeletes())); + } + + [Fact] + public async Task ReassignStack_WithEmptySourceIds_ReturnsZeroWithoutModification() + { + // Arrange + var stack1 = await _stackRepository.AddAsync(_stackData.GenerateStack(generateId: true, organizationId: TestConstants.OrganizationId, projectId: TestConstants.ProjectId), o => o.ImmediateConsistency()); + var stack2 = await _stackRepository.AddAsync(_stackData.GenerateStack(generateId: true, organizationId: TestConstants.OrganizationId, projectId: TestConstants.ProjectId), o => o.ImmediateConsistency()); + await _repository.AddAsync(_eventData.GenerateEvents(10, TestConstants.OrganizationId, TestConstants.ProjectId, stack1.Id), o => o.ImmediateConsistency()); + + // Act - empty source list must be a no-op; an unchecked empty .Stack() filter would patch ALL events + long affected = await _repository.ReassignStackAsync([], stack2.Id); + + // Assert + Assert.Equal(0, affected); + + await RefreshDataAsync(); + Assert.Equal(10, await _repository.CountAsync(q => q.Stack(stack1.Id))); + Assert.Equal(0, await _repository.CountAsync(q => q.Stack(stack2.Id))); + } + + [Fact] + public async Task GetDistinctProjectIds_WithMultipleProjects_ReturnsAllUniqueIds() + { + // Arrange + var stack = await _stackRepository.AddAsync(_stackData.GenerateStack(generateId: true, organizationId: TestConstants.OrganizationId, projectId: TestConstants.ProjectId), o => o.ImmediateConsistency()); + string project2Id = ObjectId.GenerateNewId().ToString(); + + await _repository.AddAsync(_eventData.GenerateEvents(3, TestConstants.OrganizationId, TestConstants.ProjectId, stack.Id), o => o.ImmediateConsistency()); + await _repository.AddAsync(_eventData.GenerateEvents(2, TestConstants.OrganizationId, project2Id, stack.Id), o => o.ImmediateConsistency()); + + // Act + var afterKey = new CompositeKeyResult(); + var projectIds = await _repository.GetDistinctProjectIdsAsync(10000, afterKey); + + // Assert + Assert.Equal(projectIds.Count, projectIds.Distinct(StringComparer.Ordinal).Count()); + Assert.Contains(TestConstants.ProjectId, projectIds); + Assert.Contains(project2Id, projectIds); + } + + [Fact] + public async Task GetDistinctOrganizationIds_WithMultipleOrganizations_ReturnsAllUniqueIds() + { + // Arrange + var stack = await _stackRepository.AddAsync(_stackData.GenerateStack(generateId: true, organizationId: TestConstants.OrganizationId, projectId: TestConstants.ProjectId), o => o.ImmediateConsistency()); + string org2Id = ObjectId.GenerateNewId().ToString(); + + await _repository.AddAsync(_eventData.GenerateEvents(3, TestConstants.OrganizationId, TestConstants.ProjectId, stack.Id), o => o.ImmediateConsistency()); + await _repository.AddAsync(_eventData.GenerateEvents(2, org2Id, TestConstants.ProjectId, stack.Id), o => o.ImmediateConsistency()); + + // Act + var afterKey = new CompositeKeyResult(); + var orgIds = await _repository.GetDistinctOrganizationIdsAsync(10000, afterKey); + + // Assert + Assert.Equal(orgIds.Count, orgIds.Distinct(StringComparer.Ordinal).Count()); + Assert.Contains(TestConstants.OrganizationId, orgIds); + Assert.Contains(org2Id, orgIds); + } } diff --git a/tests/Exceptionless.Tests/Repositories/StackRepositoryTests.cs b/tests/Exceptionless.Tests/Repositories/StackRepositoryTests.cs index 54b4093187..76270de5c3 100644 --- a/tests/Exceptionless.Tests/Repositories/StackRepositoryTests.cs +++ b/tests/Exceptionless.Tests/Repositories/StackRepositoryTests.cs @@ -8,7 +8,9 @@ using Foundatio.Caching; using Foundatio.Repositories; using Foundatio.Repositories.Options; +using Foundatio.Repositories.Utility; using Foundatio.Serializer; +using Foundatio.Utility; using Xunit; namespace Exceptionless.Tests.Repositories; @@ -279,4 +281,58 @@ await _repository.AddAsync( Assert.NotNull(stacks.Documents.SingleOrDefault(s => String.Equals(s.Id, TestConstants.StackId))); Assert.NotNull(stacks.Documents.SingleOrDefault(s => String.Equals(s.Id, TestConstants.StackId2))); } + + [Fact] + public async Task GetDuplicateSignatures_WithDuplicates_ReturnsSignatures() + { + string uniqueProjectId = ObjectId.GenerateNewId().ToString(); + var stack1 = _stackData.GenerateStack(generateId: true, organizationId: TestConstants.OrganizationId, projectId: uniqueProjectId); + stack1.DuplicateSignature = $"{uniqueProjectId}:dup_sig_test"; + + var stack2 = stack1.DeepClone(); + stack2.Id = ObjectId.GenerateNewId().ToString(); + + await _repository.AddAsync(new[] { stack1, stack2 }, o => o.ImmediateConsistency()); + + var duplicates = await _repository.GetDuplicateSignaturesAsync(); + Assert.Contains($"{uniqueProjectId}:dup_sig_test", duplicates); + } + + [Fact] + public async Task GetDuplicateSignatures_WithNoDuplicates_ReturnsEmpty() + { + // Use a unique project ID to avoid interference from pre-existing sample data + string uniqueProjectId = ObjectId.GenerateNewId().ToString(); + var stack1 = _stackData.GenerateStack(generateId: true, organizationId: TestConstants.OrganizationId, projectId: uniqueProjectId); + stack1.DuplicateSignature = $"{uniqueProjectId}:unique_sig_1"; + + var stack2 = _stackData.GenerateStack(generateId: true, organizationId: TestConstants.OrganizationId, projectId: uniqueProjectId); + stack2.DuplicateSignature = $"{uniqueProjectId}:unique_sig_2"; + + await _repository.AddAsync(new[] { stack1, stack2 }, o => o.ImmediateConsistency()); + + var duplicates = await _repository.GetDuplicateSignaturesAsync(); + // Should not contain our unique signatures since they each appear only once + Assert.DoesNotContain($"{uniqueProjectId}:unique_sig_1", duplicates); + Assert.DoesNotContain($"{uniqueProjectId}:unique_sig_2", duplicates); + } + + [Fact] + public async Task GetDuplicateSignatures_WithSoftDeletedStacks_ExcludesThem() + { + // Use a unique project ID to avoid interference from pre-existing sample data + string uniqueProjectId = ObjectId.GenerateNewId().ToString(); + var stack1 = _stackData.GenerateStack(generateId: true, organizationId: TestConstants.OrganizationId, projectId: uniqueProjectId); + stack1.DuplicateSignature = $"{uniqueProjectId}:softdelete_sig"; + + var stack2 = stack1.DeepClone(); + stack2.Id = ObjectId.GenerateNewId().ToString(); + stack2.IsDeleted = true; + + await _repository.AddAsync(new[] { stack1, stack2 }, o => o.ImmediateConsistency()); + + var duplicates = await _repository.GetDuplicateSignaturesAsync(); + // The soft-deleted stack should be excluded, leaving only 1 stack with this signature + Assert.DoesNotContain($"{uniqueProjectId}:softdelete_sig", duplicates); + } }