diff --git a/frontend/__tests__/utils/strings.spec.ts b/frontend/__tests__/utils/strings.spec.ts index 8fa02f4c5e81..a1ffb3ee5630 100644 --- a/frontend/__tests__/utils/strings.spec.ts +++ b/frontend/__tests__/utils/strings.spec.ts @@ -587,4 +587,304 @@ describe("string utils", () => { }); }); }); + + describe("Accent pattern rules", () => { + const commonRules = Strings.__testing.ACCENT_RULES; + const languageRules = Object.values( + Strings.__testing.LANGUAGE_ACCENT_RULES, + ); + const allRules: string[][] = [...languageRules.flat(), ...commonRules]; + const allPatterns = allRules.flat(); + + // correct unicode length + const ulen = (s: string) => Array.from(s).length; + + it("each rule has at least 2 patterns", () => { + for (const rule of allRules) { + expect(rule.length).toBeGreaterThanOrEqual(2); + } + }); + + it("each pattern has at least 2 unicode characters", () => { + for (const pattern of allPatterns) { + expect(ulen(pattern)).toBeGreaterThanOrEqual(2); + } + }); + + it("all patterns across common rules are distinct", () => { + const commonPatterns = commonRules.flat(); + const set = new Set(commonPatterns); + expect(set.size).toBe(commonPatterns.length); + }); + + it("all patterns across each language rules are distinct", () => { + for (const lang of languageRules) { + const languagePatterns = lang.flat(); + const set = new Set(languagePatterns); + expect(set.size).toBe(languagePatterns.length); + } + }); + + it("patterns inside each rule have the same unicode length", () => { + for (const rule of allRules) { + const lengths = rule.map(ulen); + const first = lengths[0]; + for (const len of lengths) { + expect(len).toBe(first); + } + } + }); + + it("common rules are sorted from longest pattern to shortest", () => { + const patternLengths = commonRules.map((rule) => ulen(rule[0]!)); + for (let i = 1; i < patternLengths.length; i++) { + expect(patternLengths[i]).toBeLessThanOrEqual(patternLengths[i - 1]!); + } + }); + + it("each language rules are sorted from longest pattern to shortest", () => { + for (const lang of languageRules) { + const patternLengths = lang.map((rule) => ulen(rule[0]!)); + for (let i = 1; i < patternLengths.length; i++) { + expect(patternLengths[i]).toBeLessThanOrEqual(patternLengths[i - 1]!); + } + } + }); + }); + + describe("_checkAccentOrderMismatchWithRules", () => { + const rules = [ + ["abc", "acb", "bac", "bca", "cab", "cba", "dba", "dbc"], + ["ab", "ba"], + ]; + const langRules = { testLang: [["bc", "cb"]] }; + const allRules = [...langRules.testLang, ...rules]; + + it("returns null when neither input nor word matches a pattern", () => { + const result = Strings.__testing._checkAccentOrderMismatchWithRules( + "xyy", + "yzz", + allRules, + ); + expect(result).toBeNull(); + }); + + it("returns null when only the word matches a pattern", () => { + const result = Strings.__testing._checkAccentOrderMismatchWithRules( + "xzz", + "yab", + allRules, + ); + expect(result).toBeNull(); + }); + + it("returns null when only the input matches a pattern", () => { + const result = Strings.__testing._checkAccentOrderMismatchWithRules( + "xab", + "yzz", + allRules, + ); + expect(result).toBeNull(); + }); + + it("returns no mismatch when both input and word match the same pattern", () => { + const result = Strings.__testing._checkAccentOrderMismatchWithRules( + "xab", + "yab", + allRules, + ); + expect(result).toBeNull(); + }); + + it("returns input pattern when input and word match different patterns in the same rule", () => { + const result = Strings.__testing._checkAccentOrderMismatchWithRules( + "xab", + "yba", + allRules, + ); + expect(result).toStrictEqual({ inputPattern: "ab", patternStart: 1 }); + }); + + it("returns input pattern if there is a mismatch even if input does not have full pattern", () => { + const result = Strings.__testing._checkAccentOrderMismatchWithRules( + "xa", + "yba", + allRules, + ); + expect(result).toStrictEqual({ inputPattern: "ab", patternStart: 1 }); + }); + + it("returns no mismatch when word does not have full pattern", () => { + const result = Strings.__testing._checkAccentOrderMismatchWithRules( + "xab", + "yb", + allRules, + ); + expect(result).toBeNull(); + }); + + it("returns no mismatch when both input and word match the same pattern (longer word)", () => { + const result = Strings.__testing._checkAccentOrderMismatchWithRules( + "xab", + "yabzzz", + allRules, + ); + expect(result).toBeNull(); + }); + + it("returns input pattern when input and word match different patterns in the same rule (longer word)", () => { + const result = Strings.__testing._checkAccentOrderMismatchWithRules( + "xab", + "ybazzz", + allRules, + ); + expect(result).toStrictEqual({ inputPattern: "ab", patternStart: 1 }); + }); + + it("prefers rules with longer patterns", () => { + // both rules ["ab", "ba"] and ["abc", "bac"] apply here + const result = Strings.__testing._checkAccentOrderMismatchWithRules( + "xab", + "ybac", + allRules, + ); + expect(result).toStrictEqual({ inputPattern: "abc", patternStart: 1 }); // the input does not have to have the full pattern + }); + + it("prefers language-specific rules even if longer common rules exist", () => { + const result = Strings.__testing._checkAccentOrderMismatchWithRules( + "xbc", + "ycba", + allRules, + ); + expect(result).toStrictEqual({ inputPattern: "bc", patternStart: 1 }); // not the longer pattern "bca" + }); + + it("prefers earlier patterns if there are 2 input pattern matches in the same rule", () => { + // both "cab" and "cba" match input pattern + const result = Strings.__testing._checkAccentOrderMismatchWithRules( + "xc", + "ybac", + allRules, + ); + expect(result).toStrictEqual({ inputPattern: "cab", patternStart: 1 }); + }); + + it("prefers matching with longest overlap in the same rule even if earlier-shorter-overlap-patterns match", () => { + // There are 2 [input, word] matches: ["ba", "ab"] at position 1 and ["ab", "ba"] at position 2 + const result = Strings.__testing._checkAccentOrderMismatchWithRules( + "xba", + "yaba", + allRules, + ); + // even though an earlier pattern "ab" exist but "ba" has longer overlap + expect(result).toStrictEqual({ inputPattern: "ba", patternStart: 1 }); + }); + + // always check patterns in the same position + it("returns null when word's pattern is after input's pattern", () => { + // pattern "ba" exists in word but in a different position from input + const result = Strings.__testing._checkAccentOrderMismatchWithRules( + "xab", + "yyba", + allRules, + ); + expect(result).toBeNull(); + }); + + // always check patterns in the same position + it("returns null when word's pattern is before input's pattern", () => { + // pattern "ba" exists in word but in a different position from input + const result = Strings.__testing._checkAccentOrderMismatchWithRules( + "xxab", + "ybay", + allRules, + ); + expect(result).toBeNull(); + }); + + it("returns 1st input pattern match when word matches a pattern after 2 input matches", () => { + // input matches "abc" and "acb" before word matches "bac" but 1st match is returned + const result = Strings.__testing._checkAccentOrderMismatchWithRules( + "xa", + "ybac", + allRules, + ); + expect(result).toStrictEqual({ inputPattern: "abc", patternStart: 1 }); + }); + + it("returns the pattern that mismatches at the same position", () => { + const result = Strings.__testing._checkAccentOrderMismatchWithRules( + "xabc", + "yyyabc", + allRules, + ); + expect(result).toStrictEqual({ inputPattern: "cab", patternStart: 3 }); + }); + + it("returns null if input and word has the same language specific pattern, even if a longer common-rule mismatch exists", () => { + // there is a longer pattern mismatch ["abc", "dbc"]. However, in a higher priority + // rule (language-specific) input and word have the same pattern ["bc", "bc"] + const result = Strings.__testing._checkAccentOrderMismatchWithRules( + "xabc", + "ydbc", + allRules, + ); + expect(result).toBeNull(); + }); + + it("respects priority when there are 2 rules: 1 mismatch and 1 with the same pattern", () => { + // the longer pattern rule has a mismatch ["cba", "dba"], so it's returned + // even though input and word have the same pattern in a lower priority rule ["ba", "ba"] + const result = Strings.__testing._checkAccentOrderMismatchWithRules( + "xcba", + "ydba", + allRules, + ); + expect(result).toStrictEqual({ inputPattern: "cba", patternStart: 1 }); + }); + + it.each([ + { input: "xab", word: "ycba", expected: ["abc", 1] }, + { input: "xab", word: "yyba", expected: null }, + ])( + "returns $expected for input $input and word $word", + ({ input, word, expected }) => { + const result = Strings.__testing._checkAccentOrderMismatchWithRules( + input, + word, + allRules, + ); + expect(result).toStrictEqual( + expected && { inputPattern: expected[0], patternStart: expected[1] }, + ); + }, + ); + }); + + describe("checkAccentOrderMismatch", () => { + it("returns no mismatch when both input and word match the same pattern", () => { + const result = Strings.checkAccentOrderMismatch("حطَّ", "حطَّ"); + expect(result).toBeNull(); + }); + + it("returns input pattern when input and word match different patterns in the same rule", () => { + const result = Strings.checkAccentOrderMismatch("طلُّ", "طلُّ"); + expect(result).toStrictEqual({ inputPattern: "ُّ", patternStart: 2 }); + }); + + it("returns input pattern if there is a mismatch in arabic specific 2 char rule", () => { + const result = Strings.checkAccentOrderMismatch( + "خصوصاً", + "خصوصًا", + "arabic", + ); + expect(result).toStrictEqual({ inputPattern: "اً", patternStart: 4 }); + }); + + it("returns input pattern if there is a mismatch in arabic specific 3 char rule", () => { + const result = Strings.checkAccentOrderMismatch("حقّاً", "حقًّا", "arabic"); + expect(result).toStrictEqual({ inputPattern: "ّاً", patternStart: 2 }); + }); + }); }); diff --git a/frontend/src/ts/input/handlers/insert-text.ts b/frontend/src/ts/input/handlers/insert-text.ts index 4b577eece08c..bfb910963a77 100644 --- a/frontend/src/ts/input/handlers/insert-text.ts +++ b/frontend/src/ts/input/handlers/insert-text.ts @@ -12,7 +12,11 @@ import { checkIfFailedDueToMinBurst, checkIfFinished, } from "../helpers/fail-or-finish"; -import { areCharactersVisuallyEqual, isSpace } from "../../utils/strings"; +import { + areCharactersVisuallyEqual, + checkAccentOrderMismatch, + isSpace, +} from "../../utils/strings"; import * as TestState from "../../test/test-state"; import * as TestLogic from "../../test/test-logic"; import { @@ -101,7 +105,7 @@ export async function onInsertText(options: OnInsertTextParams): Promise { // input and target word const testInput = TestInput.input.current; - const currentWord = TestWords.words.getCurrent(); + let currentWord = TestWords.words.getCurrent(); // if the character is visually equal, replace it with the target character // this ensures all future equivalence checks work correctly @@ -112,6 +116,23 @@ export async function onInsertText(options: OnInsertTextParams): Promise { ); const data = normalizedData ?? options.data; + // if the input is committing to a pattern that is different from target word's pattern + // and those patterns are equivalent, replace target word's pattern with input's. + // changing target word here ensures the input is considered correct, + // and actually typed characters are highlighted in `updateWordLetters()`. + const pattern = checkAccentOrderMismatch( + testInput + data, + currentWord, + Config.language, + ); + if (pattern !== null) { + currentWord = + currentWord.slice(0, pattern.patternStart) + + pattern.inputPattern + + currentWord.slice(pattern.patternStart + pattern.inputPattern.length); + TestWords.words.list[TestState.activeWordIndex] = currentWord; + } + // start if needed if (!TestState.isActive) { TestLogic.startTest(now); diff --git a/frontend/src/ts/utils/strings.ts b/frontend/src/ts/utils/strings.ts index 6d134f881407..d45b42bf1735 100644 --- a/frontend/src/ts/utils/strings.ts +++ b/frontend/src/ts/utils/strings.ts @@ -319,6 +319,107 @@ export function areCharactersVisuallyEqual( return false; } +// put rules with longer patterns first +const ACCENT_RULES = [ + ["َّ", "َّ"], + ["ًّ", "ًّ"], + ["ُّ", "ُّ"], + ["ٌّ", "ٌّ"], + ["ِّ", "ِّ"], + ["ٍّ", "ٍّ"], +]; +const LANGUAGE_ACCENT_RULES: Partial> = { + // rules with longer patterns first + arabic: [ + ["ّاً", "ًّا", "ًّا"], + ["اً", "ًا"], + ], +}; + +/** + * Checks if there is a mismatch in patterns between 2 words: input and target word. + * A mismatch is when those words contain different patterns that are considered + * equivalent according to pre-determined set of rules, at the same position. + * The target word needs to have the full pattern, but the input only + * needs to end with the first part of the pattern. + * The rules have the following priority (from highest to lowest): language-specific + * rules - rules with longest pattern - rules having the longest overlap with input. + * If the input matches 2 patterns within a rule, earliest pattern is returned. + * @param input input word to check if it ends with pattern + * @param targetWord target word to check if it contains pattern + * @param language optional language to check for language-specific rules + * @returns an object containing the input pattern with its start position if there + * is a mismatch, null otherwise (having no equivalent patterns, or the same pattern) + */ +export function checkAccentOrderMismatch( + input: string, + targetWord: string, + language?: Language, +): { inputPattern: string; patternStart: number } | null { + const langRules = + language && LANGUAGE_ACCENT_RULES[language] + ? LANGUAGE_ACCENT_RULES[language] + : []; + return _checkAccentOrderMismatchWithRules(input, targetWord, [ + ...langRules, + ...ACCENT_RULES, + ]); +} + +function _checkAccentOrderMismatchWithRules( + input: string, + targetWord: string, + accentRules: string[][], +): { inputPattern: string; patternStart: number } | null { + const minWordsLength = Math.min(input.length, targetWord.length); + + for (const rule of accentRules) { + const patternLength = rule[0]?.length ?? 0; + const minLength = Math.min(patternLength, minWordsLength); + + for (let overlapLen = minLength; overlapLen >= 1; overlapLen--) { + let inputPattern: string | null = null; + let wordPattern: string | null = null; + let patternStart: number | null = null; + + let mismatch; + const checkMismatch = (): { + inputPattern: string; + patternStart: number; + } | null => { + if ( + inputPattern !== null && + patternStart !== null && + wordPattern !== null && + inputPattern !== wordPattern + ) { + return { inputPattern, patternStart }; + } + return null; + }; + + const overlap = input.slice(-overlapLen); + const matchStart = input.length - overlapLen; + const matchEnd = matchStart + patternLength; + const wordSlice = targetWord.slice(matchStart, matchEnd); + + for (const pattern of rule) { + if (inputPattern === null && pattern.startsWith(overlap)) { + inputPattern = pattern; + patternStart = matchStart; + // same pattern in both, no mismatch + if (wordSlice === pattern) return null; + } else if (wordSlice === pattern) { + wordPattern = pattern; + } + if ((mismatch = checkMismatch())) return mismatch; + } + } + } + + return null; +} + export function toHex(buffer: ArrayBuffer): string { if (Uint8Array.prototype.toHex !== undefined) { return new Uint8Array(buffer).toHex(); @@ -365,4 +466,7 @@ export function isSpace(char: string): boolean { // Export testing utilities for unit tests export const __testing = { hasRTLCharacters, + ACCENT_RULES, + LANGUAGE_ACCENT_RULES, + _checkAccentOrderMismatchWithRules, };