diff --git a/services/apps/merge_suggestions_worker/src/activities/memberMergeSuggestions.ts b/services/apps/merge_suggestions_worker/src/activities/memberMergeSuggestions.ts index 4ddbdf3572..474ceb7c11 100644 --- a/services/apps/merge_suggestions_worker/src/activities/memberMergeSuggestions.ts +++ b/services/apps/merge_suggestions_worker/src/activities/memberMergeSuggestions.ts @@ -207,6 +207,7 @@ export async function getMemberMergeSuggestions( query: value, prefix_length: 1, fuzziness: 'auto', + max_expansions: 3, }, }, }), diff --git a/services/apps/merge_suggestions_worker/src/activities/organizationMergeSuggestions.ts b/services/apps/merge_suggestions_worker/src/activities/organizationMergeSuggestions.ts index b054f752dd..a01f426b09 100644 --- a/services/apps/merge_suggestions_worker/src/activities/organizationMergeSuggestions.ts +++ b/services/apps/merge_suggestions_worker/src/activities/organizationMergeSuggestions.ts @@ -155,10 +155,10 @@ export async function getOrganizationMergeSuggestions( cleaned = cleaned.split(':').pop() || cleaned } - return cleaned + return cleaned.toLowerCase() } - // Process up to 100 identities + // Process up to 75 identities // This is a safety limit to prevent OpenSearch max clause errors for (let i = 0; i < Math.min(identities.length, 75); i++) { const { value: rawValue, platform } = identities[i] @@ -187,7 +187,7 @@ export async function getOrganizationMergeSuggestions( // Build OpenSearch query clauses const identitiesShould = [] - const CHUNK_SIZE = 20 // Split queries into chunks to avoid OpenSearch limits + const CHUNK_SIZE = 15 // Split queries into chunks to avoid OpenSearch limits const clauseBuilders: OpenSearchQueryClauseBuilder>[] = [ { @@ -212,6 +212,7 @@ export async function getOrganizationMergeSuggestions( query: value, prefix_length: 1, fuzziness: 'auto', + max_expansions: 3, }, }, }), @@ -224,6 +225,7 @@ export async function getOrganizationMergeSuggestions( prefix: { [`nested_identities.string_value`]: { value, + rewrite: 'top_terms_3', }, }, }), diff --git a/services/apps/merge_suggestions_worker/src/workflows.ts b/services/apps/merge_suggestions_worker/src/workflows.ts index 488e8d71a3..c23ace7611 100644 --- a/services/apps/merge_suggestions_worker/src/workflows.ts +++ b/services/apps/merge_suggestions_worker/src/workflows.ts @@ -5,6 +5,7 @@ import { mergeOrganizationsWithLLM } from './workflows/mergeOrganizationsWithLLM import { spawnMemberMergeSuggestionsForAllTenants } from './workflows/spawnMemberMergeSuggestionsForAllTenants' import { spawnOrganizationMergeSuggestionsForAllTenants } from './workflows/spawnOrganizationMergeSuggestionsForAllTenants' import { testMergingEntitiesWithLLM } from './workflows/testMergingEntitiesWithLLM' +import { testOrganizationMergeSuggestions } from './workflows/testOrganizationMergeSuggestions' export { generateMemberMergeSuggestions, @@ -14,4 +15,5 @@ export { testMergingEntitiesWithLLM, mergeOrganizationsWithLLM, mergeMembersWithLLM, + testOrganizationMergeSuggestions, } diff --git a/services/apps/merge_suggestions_worker/src/workflows/testOrganizationMergeSuggestions.ts b/services/apps/merge_suggestions_worker/src/workflows/testOrganizationMergeSuggestions.ts new file mode 100644 index 0000000000..ce2b8237b8 --- /dev/null +++ b/services/apps/merge_suggestions_worker/src/workflows/testOrganizationMergeSuggestions.ts @@ -0,0 +1,50 @@ +import { proxyActivities } from '@temporalio/workflow' + +import { IOrganizationBaseForMergeSuggestions, IOrganizationMergeSuggestion } from '@crowd/types' + +import * as activities from '../activities/organizationMergeSuggestions' +import { IProcessGenerateOrganizationMergeSuggestionsArgs } from '../types' +import { chunkArray } from '../utils' + +const activity = proxyActivities({ startToCloseTimeout: '1 minute' }) + +export async function testOrganizationMergeSuggestions( + args: IProcessGenerateOrganizationMergeSuggestionsArgs, +): Promise { + const PAGE_SIZE = 25 + const PARALLEL_SUGGESTION_PROCESSING = 50 + + const result: IOrganizationBaseForMergeSuggestions[] = await activity.getOrganizations( + args.tenantId, + PAGE_SIZE, + null, + null, + args.organizationIds, + ) + + if (result.length === 0) { + console.log('No organizations found for merge suggestion test!') + return + } + + const allMergeSuggestions: IOrganizationMergeSuggestion[] = [] + + const promiseChunks = chunkArray(result, PARALLEL_SUGGESTION_PROCESSING) + + for (const chunk of promiseChunks) { + const mergeSuggestionsPromises: Promise[] = chunk.map( + (organization) => activity.getOrganizationMergeSuggestions(args.tenantId, organization), + ) + + const mergeSuggestionsResults: IOrganizationMergeSuggestion[][] = + await Promise.all(mergeSuggestionsPromises) + allMergeSuggestions.push(...mergeSuggestionsResults.flat()) + } + + // Add all merge suggestions to add to merge + if (allMergeSuggestions.length > 0) { + console.log('Found merge suggestions!', allMergeSuggestions) + } else { + console.log('No merge suggestions found for provided organizations!') + } +}