From 92d6f85ce72b92a6d71c7449b166a739db2f8e6f Mon Sep 17 00:00:00 2001
From: Jinwoo Bae <bbae7050@gmail.com>
Date: Thu, 16 Apr 2026 11:45:00 -0700
Subject: [PATCH 1/3] =?UTF-8?q?Add=20Korean=20TN=20post-processing=20rules?=
 =?UTF-8?q?=20for=20particle=20agreement=20and=20month=20=E2=80=A6=20(#409?=
 =?UTF-8?q?)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Add Korean TN post-processing rules for particle agreement and month handling

Signed-off-by: Jinwoo Bae <bbae7050@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Add Korean TN fraction test cases for particle agreement

Signed-off-by: Jinwoo Bae <bbae7050@gmail.com>

* Fix Korean fraction verbalization with particle-aware handling and remove post_processing dependency

Signed-off-by: Jinwoo Bae <bbae7050@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Fix date and fraction normalization issues based on review feedback

Signed-off-by: Jinwoo Bae <bbae7050@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: Jinwoo Bae <bbae7050@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .../text_normalization/ko/taggers/date.py     |  31 +++---
 .../text_normalization/ko/taggers/fraction.py |  19 +++-
 .../ko/verbalizers/fraction.py                | 104 +++++++++++++++++-
 .../test_cases_fraction.txt                   |  16 ++-
 4 files changed, 146 insertions(+), 24 deletions(-)

diff --git a/nemo_text_processing/text_normalization/ko/taggers/date.py b/nemo_text_processing/text_normalization/ko/taggers/date.py
index 4f2da5702..9748abc49 100644
--- a/nemo_text_processing/text_normalization/ko/taggers/date.py
+++ b/nemo_text_processing/text_normalization/ko/taggers/date.py
@@ -226,8 +226,7 @@ def __init__(self, cardinal: GraphFst, deterministic: bool = True):
                 + insert_space
                 + pynutil.insert("year: \"")
                 + (YEAR_ERA_1TO4 @ graph_cardinal)
-                + pynutil.delete("년")
-                + pynutil.insert("년")
+                + pynini.accep("년")
                 + pynutil.insert("\"")
             )
             |
@@ -235,27 +234,26 @@ def __init__(self, cardinal: GraphFst, deterministic: bool = True):
             (
                 pynutil.insert("year: \"")
                 + (YEAR_NO_ERA_1TO4 @ graph_cardinal)
-                + pynutil.delete("년")
-                + pynutil.insert("년")
+                + pynini.accep("년")
                 + pynutil.insert("\"")
             )
         ).optimize()
 
         individual_month_component = (
-            pynutil.insert("month: \"")
-            + month_cardinal
-            + pynutil.delete("월")
-            + pynutil.insert("월")
-            + pynutil.insert("\"")
+            pynutil.insert("month: \"") + month_cardinal + pynini.accep("월") + pynutil.insert("\"")
         )
 
-        individual_day_component = (
-            pynutil.insert("day: \"")
-            + cardinal_lz
-            + pynutil.delete("일")
-            + pynutil.insert("일")
-            + pynutil.insert("\"")
-        )
+        month_josa = pynini.union("에", "은", "는", "에는")
+
+        individual_month_component_with_josa = (
+            pynutil.insert('month: "')
+            + month_cardinal
+            + pynini.accep("월")
+            + pynini.closure(month_josa, 0, 1)
+            + pynutil.insert('"')
+        ).optimize()
+
+        individual_day_component = pynutil.insert("day: \"") + cardinal_lz + pynini.accep("일") + pynutil.insert("\"")
 
         week_full_word_acceptor = pynini.project(week, "output")
         week_component_full_word = pynutil.insert("weekday: \"") + week_full_word_acceptor + pynutil.insert("\"")
@@ -272,6 +270,7 @@ def __init__(self, cardinal: GraphFst, deterministic: bool = True):
             day_and_weekday_component
             | month_and_weekday_component
             | individual_year_component
+            | individual_month_component_with_josa
             | individual_month_component
             | individual_day_component
             | week_component
diff --git a/nemo_text_processing/text_normalization/ko/taggers/fraction.py b/nemo_text_processing/text_normalization/ko/taggers/fraction.py
index 2163f5f7f..64ea0c56e 100644
--- a/nemo_text_processing/text_normalization/ko/taggers/fraction.py
+++ b/nemo_text_processing/text_normalization/ko/taggers/fraction.py
@@ -81,6 +81,23 @@ def __init__(self, cardinal: GraphFst, deterministic: bool = True):
             + numerator_component
         )
 
+        # Optional particles following the fraction
+        particle_subject = pynutil.insert('morphosyntactic_features: "분의_subject"') + (
+            pynutil.delete("이") | pynutil.delete("가")
+        )
+        particle_topic = pynutil.insert('morphosyntactic_features: "분의_topic"') + (
+            pynutil.delete("은") | pynutil.delete("는")
+        )
+        particle_object = pynutil.insert('morphosyntactic_features: "분의_object"') + (
+            pynutil.delete("을") | pynutil.delete("를")
+        )
+
+        optional_particle = pynini.closure(
+            pynutil.insert(NEMO_SPACE) + (particle_subject | particle_topic | particle_object),
+            0,
+            1,
+        )
+
         # Optional minus sign
         optional_sign = (
             pynutil.insert(f'negative: {DOUBLE_QUOTE}')
@@ -90,7 +107,7 @@ def __init__(self, cardinal: GraphFst, deterministic: bool = True):
         )
 
         # Combine full graph
-        graph = pynini.closure(optional_sign, 0, 1) + (graph_fraction_slash | graph_fraction_word)
+        graph = pynini.closure(optional_sign, 0, 1) + (graph_fraction_slash | graph_fraction_word) + optional_particle
         self.graph = graph.optimize()
         final_graph = self.add_tokens(graph)
         self.fst = final_graph.optimize()
diff --git a/nemo_text_processing/text_normalization/ko/verbalizers/fraction.py b/nemo_text_processing/text_normalization/ko/verbalizers/fraction.py
index bafbf133d..472b8a86d 100644
--- a/nemo_text_processing/text_normalization/ko/verbalizers/fraction.py
+++ b/nemo_text_processing/text_normalization/ko/verbalizers/fraction.py
@@ -60,7 +60,43 @@ def __init__(self, deterministic: bool = True):
             + numerator_component
         )
 
-        # Match and delete integer_part field (e.g., "2" in "2과3분의1")
+        # Handle subject particle feature (분의_subject)
+        # Insert default particle "이" (will be corrected later via rewrite rules)
+        subject_suffix = (
+            pynutil.delete(NEMO_SPACE)
+            + pynutil.delete('morphosyntactic_features:')
+            + delete_space
+            + pynutil.delete('"분의_subject"')
+            + delete_space
+            + pynutil.insert("이")  # 일단 기본값
+        )
+
+        # Handle topic particle feature (분의_topic)
+        topic_suffix = (
+            pynutil.delete(NEMO_SPACE)
+            + pynutil.delete('morphosyntactic_features:')
+            + delete_space
+            + pynutil.delete('"분의_topic"')
+            + delete_space
+            + pynutil.insert("은")
+        )
+
+        # Handle object particle feature (분의_object)
+        object_suffix = (
+            pynutil.delete(NEMO_SPACE)
+            + pynutil.delete('morphosyntactic_features:')
+            + delete_space
+            + pynutil.delete('"분의_object"')
+            + delete_space
+            + pynutil.insert("을")
+        )
+
+        # Combine fraction + optional particle suffix
+        # Particle is always inserted first in default form and later corrected
+        graph_fraction_all = graph_fraction + pynini.closure(subject_suffix | topic_suffix | object_suffix, 0, 1)
+
+        # Handle integer + fraction (e.g., "2과 3/4")
+        # integer_part is removed and replaced with proper spacing
         graph_integer = (
             pynutil.delete('integer_part:')
             + delete_space
@@ -69,9 +105,10 @@ def __init__(self, deterministic: bool = True):
             + pynutil.delete('"')
             + pynutil.insert(NEMO_SPACE)
         )
-        graph_integer_fraction = graph_integer + delete_space + graph_fraction
+        # Combine integer part with fraction
+        graph_integer_fraction = graph_integer + delete_space + graph_fraction_all
 
-        # Match and delete optional negative field (e.g., "마이너스")
+        # Handle optional negative prefix (e.g., "마이너스")
         optional_sign = (
             pynutil.delete('negative:')
             + delete_space
@@ -82,9 +119,64 @@ def __init__(self, deterministic: bool = True):
             + pynutil.insert(NEMO_SPACE)
         )
 
-        # Final graph handles optional negative + (integer + fraction | fraction only)
-        graph = pynini.closure(optional_sign, 0, 1) + (graph_integer_fraction | graph_fraction)
+        # Final structure:
+        # [optional negative] + (integer + fraction OR fraction only)
+        graph = pynini.closure(optional_sign, 0, 1) + (graph_integer_fraction | graph_fraction_all)
 
-        # Final optimized verbalizer FST
+        # Remove token wrappers
         final_graph = self.delete_tokens(graph)
+
+        # Sigma for rewrite context (entire string)
+        sigma = pynini.closure(NEMO_NOT_QUOTE | NEMO_SPACE)
+
+        # Fix subject particle agreement (이 → 가 for vowel-ending numerals)
+        # e.g., 사이 → 사가, 구이 → 구가
+        subject_rewrite = pynini.cdrewrite(
+            pynini.string_map(
+                [
+                    ("이이", "이가"),
+                    ("사이", "사가"),
+                    ("오이", "오가"),
+                    ("구이", "구가"),
+                ]
+            ),
+            "",
+            "",
+            sigma,
+        )
+
+        # Fix topic particle agreement (은 → 는)
+        # e.g., 이은 → 이는, 사은 → 사는
+        topic_rewrite = pynini.cdrewrite(
+            pynini.string_map(
+                [
+                    ("이은", "이는"),
+                    ("사은", "사는"),
+                    ("오은", "오는"),
+                    ("구은", "구는"),
+                ]
+            ),
+            "",
+            "",
+            sigma,
+        )
+
+        # Fix object particle agreement (을 → 를)
+        # e.g., 오을 → 오를, 이을 → 이를
+        object_rewrite = pynini.cdrewrite(
+            pynini.string_map(
+                [
+                    ("이을", "이를"),
+                    ("사을", "사를"),
+                    ("오을", "오를"),
+                    ("구을", "구를"),
+                ]
+            ),
+            "",
+            "",
+            sigma,
+        )
+
+        # Apply all rewrite rules sequentially and final optimized FST
+        final_graph = final_graph @ subject_rewrite @ topic_rewrite @ object_rewrite
         self.fst = final_graph.optimize()
diff --git a/tests/nemo_text_processing/ko/data_text_normalization/test_cases_fraction.txt b/tests/nemo_text_processing/ko/data_text_normalization/test_cases_fraction.txt
index a183be59b..65e5049b8 100644
--- a/tests/nemo_text_processing/ko/data_text_normalization/test_cases_fraction.txt
+++ b/tests/nemo_text_processing/ko/data_text_normalization/test_cases_fraction.txt
@@ -11,4 +11,18 @@
 1과1/3~일과 삼분의 일
 1과√1/4~일과 사분의 루트 일
 3분의1~삼분의 일
-121분의3221~백이십일분의 삼천이백이십일
\ No newline at end of file
+121분의3221~백이십일분의 삼천이백이십일
+이번 경기의 3/5이 중요하다~이번 경기의 오분의 삼이 중요하다
+전체 구역의 4/7이 통제되었다~전체 구역의 칠분의 사가 통제되었다
+설문 응답자의 9/10이 찬성했다~설문 응답자의 십분의 구가 찬성했다
+그 중 2/3은 성공했다~그 중 삼분의 이는 성공했다
+참가자의 5/8이 탈락했다~참가자의 팔분의 오가 탈락했다
+참가자의 6/7 이 통과했다~참가자의 칠분의 육 이 통과했다
+전체의 3/4 이 감소했다~전체의 사분의 삼 이 감소했다
+응답자의 2/5이 반대했다~응답자의 오분의 이가 반대했다
+학생의 7/9 이 합격했다~학생의 구분의 칠 이 합격했다
+전체의 1/2 이 남았다~전체의 이분의 일 이 남았다
+그 중 4/5이 성공했다~그 중 오분의 사가 성공했다
+전체의 5/6이 완료되었다~전체의 육분의 오가 완료되었다
+참가자의 3/8이 탈락했다~참가자의 팔분의 삼이 탈락했다
+응답자의 6/10 이 동의했다~응답자의 십분의 육 이 동의했다
\ No newline at end of file

From d35c205150eadc8643a40a06eedc576499c20c17 Mon Sep 17 00:00:00 2001
From: Mariana <47233618+mgrafu@users.noreply.github.com>
Date: Thu, 23 Apr 2026 11:07:54 -0400
Subject: [PATCH 2/3] Jenkins fix (#419)

Signed-off-by: Mariana Graterol Fuenmayor <marianag@nvidia.com>
---
 Jenkinsfile | 231 +++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 155 insertions(+), 76 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 1219aae54..34b25bbbe 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -1,9 +1,9 @@
 pipeline {
   agent {
-    docker {
-      image 'tnitn_ci_py310:24.07'
-      args '-v /mnt/jenkins/jenkinsci/TestData:/home/jenkins/TestData -v $HOME/.cache:/root/.cache --shm-size=4g --entrypoint=""'
-    }
+        docker {
+          image 'tnitn_ci_py310:24.07'
+          args '-v /mnt/jenkins/jenkinsci/TestData:/home/jenkins/TestData -v $HOME/.cache:/root/.cache --shm-size=4g --entrypoint=""'
+        }
   }
   options {
     timeout(time: 2, unit: 'HOURS')
@@ -28,11 +28,12 @@ pipeline {
     MR_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/03-12-24-1'
     JA_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/10-17-24-1'
     HI_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/02-18-26-0'
-    KO_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/06-03-25-0'
+    KO_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/04-22-26-0'
     DEFAULT_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/06-08-23-0'
   }
   stages {
 
+
     stage('PyTorch version') {
       steps {
         sh 'python -c "import torch; print(torch.__version__)"'
@@ -46,6 +47,7 @@ pipeline {
       }
     }
 
+
     stage('L0: Create EN TN/ITN Grammars') {
       when {
         anyOf {
@@ -53,6 +55,7 @@ pipeline {
           branch 'staging/**'
           branch 'staging_*'
           changeRequest target: 'main'
+
         }
       }
       failFast true
@@ -77,10 +80,35 @@ pipeline {
             sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --language en --text="twenty" --cache_dir ${EN_TN_CACHE}'
           }
         }
+
+      }
+    }
+    stage('L0: Create HI TN/ITN Grammars') {
+    when {
+        anyOf {
+            branch 'main'
+            branch 'staging/**'
+            branch 'staging_*'
+            changeRequest target: 'main'
+        }
+    }
+    failFast true
+    parallel {
+        stage('L0: Hi TN grammars') {
+            steps {
+                sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=hi --text="१" --cache_dir ${HI_TN_CACHE}'
+            }
+        }
+        stage('L0: Hi ITN grammars') {
+            steps {
+                sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=hi --text="एक" --cache_dir ${HI_TN_CACHE}'
+            }
+        }
+        
       }
     }
 
-    stage('L0: Create DE/ES/FR TN/ITN Grammars') {
+    stage('L0: Create DE/ES TN/ITN Grammars') {
       when {
         anyOf {
           branch 'main'
@@ -93,12 +121,12 @@ pipeline {
       parallel {
         stage('L0: DE TN grammars') {
           steps {
-            sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=de --text="1" --cache_dir ${DEFAULT_TN_CACHE}'
+            sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=de --text="1" --cache_dir ${DE_TN_CACHE}'
           }
         }
         stage('L0: DE ITN grammars') {
           steps {
-            sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=de --text="ein hundert " --cache_dir ${DEFAULT_TN_CACHE}'
+            sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=de --text="ein hundert " --cache_dir ${DE_TN_CACHE}'
           }
         }
         stage('L0: ES TN grammars') {
@@ -116,24 +144,38 @@ pipeline {
             sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=es_en --text="ciento uno " --cache_dir ${ES_EN_TN_CACHE}'
           }
         }
-        stage('L0: FR TN grammars') {
+      }
+    }
+
+    stage('L0: Create AR TN/ITN Grammars') {
+      when {
+        anyOf {
+          branch 'main' 
+          branch 'staging/**'
+          branch 'staging_*'
+          changeRequest target: 'main'
+        }
+      }
+      failFast true
+      parallel {
+        stage('L0: AR TN grammars') {
           steps {
-            sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=fr --text="2" --cache_dir ${FR_TN_CACHE}'
+            sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=ar --text="2" --cache_dir ${AR_TN_CACHE}'
           }
         }
-        stage('L0: FR ITN grammars') {
+        stage('L0: AR ITN grammars') {
           steps {
-            sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=fr --text="cent " --cache_dir ${FR_TN_CACHE}'
+            sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=ar --text="اثنان " --cache_dir ${AR_TN_CACHE}'
           }
         }
+
       }
     }
 
-
-    stage('L0: Create HI/VI/RU TN/ITN') {
+    stage('L0: Create FR TN/ITN & VI TN/ITN & HU TN & IT TN') {
       when {
         anyOf {
-          branch 'main'
+          branch 'main' 
           branch 'staging/**'
           branch 'staging_*'
           changeRequest target: 'main'
@@ -141,43 +183,43 @@ pipeline {
       }
       failFast true
       parallel {
-        stage('L0: VI ITN grammars') {
+        stage('L0: FR TN grammars') {
           steps {
-            sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=vi --text="một ngàn " --cache_dir ${VI_TN_CACHE}'
+            sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=fr --text="2" --cache_dir ${FR_TN_CACHE}'
           }
         }
-        stage('L0: VI TN grammars') {
+        stage('L0: FR ITN grammars') {
           steps {
-            sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=vi --text="100" --cache_dir ${VI_TN_CACHE}'
+            sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=fr --text="cent " --cache_dir ${FR_TN_CACHE}'
           }
         }
-        stage('L0: RU TN grammars') {
+        stage('L0: VI ITN grammars') {
           steps {
-            sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize_with_audio.py --lang=ru --text="03" --cache_dir ${RU_TN_CACHE}'
+            sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=vi --text="một ngàn " --cache_dir ${VI_TN_CACHE}'
           }
         }
-        stage('L0: RU ITN grammars') {
+        stage('L0: VI TN grammars') {
           steps {
-            sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=ru --text="три " --cache_dir ${RU_TN_CACHE}'
+            sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=vi --text="100" --cache_dir ${VI_TN_CACHE}'
           }
         }
-        stage('L0: Hi TN grammars') {
-          steps {
-            sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=hi --text="१" --cache_dir ${HI_TN_CACHE}'
+        stage('L0: HU TN grammars') {
+         steps {
+            sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=hu --text="100" --cache_dir ${HU_TN_CACHE}'
           }
         }
-        stage('L0: Hi ITN grammars') {
+        stage('L0: IT TN grammars') {
           steps {
-            sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=hi --text="एक" --cache_dir ${HI_TN_CACHE}'
+            sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=it --text="122" --cache_dir ${IT_TN_CACHE}'
           }
         }
       }
     }
 
-    stage('L0: Create AR/HU/SV/PT/IT TN/ITN Grammars') {
+    stage('L0: Create RU TN/ITN Grammars & SV & PT') {
       when {
         anyOf {
-          branch 'main'
+          branch 'main' 
           branch 'staging/**'
           branch 'staging_*'
           changeRequest target: 'main'
@@ -185,53 +227,60 @@ pipeline {
       }
       failFast true
       parallel {
-        stage('L0: SV TN grammars') {
-          steps {
-            sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=sv --text="100" --cache_dir ${SV_TN_CACHE}'
-          }
-        }
-        stage('L0: HU TN grammars') {
+        stage('L0: RU TN grammars') {
           steps {
-            sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=hu --text="100" --cache_dir ${HU_TN_CACHE}'
+            sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize_with_audio.py --lang=ru --text="03" --cache_dir ${RU_TN_CACHE}'
           }
         }
-        stage('L0: AR TN grammars') {
+        stage('L0: RU ITN grammars') {
           steps {
-            sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=ar --text="2" --cache_dir ${AR_TN_CACHE}'
+            sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=ru --text="три " --cache_dir ${RU_TN_CACHE}'
           }
         }
-        stage('L0: AR ITN grammars') {
-          steps {
-            sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=ar --text="اثنان " --cache_dir ${AR_TN_CACHE}'
+        stage('L0: SV TN grammars') {
+         steps {
+            sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=sv --text="100" --cache_dir ${SV_TN_CACHE}'
           }
         }
-        // stage('L0: SV ITN grammars') {
-        //   steps {
-        //     sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=sv --text="hundra " --cache_dir ${SV_TN_CACHE}'
-        //   }
-        // }
-        // stage('L0: PT TN grammars') {
-        //   steps {
-        //     sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=pt --text="2" --cache_dir ${DEFAULT_TN_CACHE}'
-        //   }
-        // }
+      //  stage('L0: SV ITN grammars') {
+      //    steps {
+      //      sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=sv --text="hundra " --cache_dir ${SV_TN_CACHE}'
+      //    }
+      //  }
+      // stage('L0: PT TN grammars') {
+       //  steps {
+       //     sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=pt --text="2" --cache_dir ${DEFAULT_TN_CACHE}'
+       //   }
+       // }
         stage('L0: PT ITN grammars') {
           steps {
             sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=pt --text="dez " --cache_dir ${PT_TN_CACHE}'
           }
         }
-        stage('L0: IT TN grammars') {
+      }
+    }
+    stage('L0: Create HE ITN Grammar') {
+      when {
+        anyOf {
+          branch 'main' 
+          branch 'staging/**'
+          branch 'staging_*'
+          changeRequest target: 'main'
+        }
+      }
+      failFast true
+      parallel {
+        stage('L0: HE ITN grammars') {
           steps {
-            sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=it --text="122" --cache_dir ${IT_TN_CACHE}'
+            sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=he --text="ת " --cache_dir ${HE_TN_CACHE}'
           }
         }
       }
     }
-
-    stage('L0: Create MR/HE/HY TN/ITN Grammars') {
+    stage('L0: Create HY TN/ITN Grammars & MR') {
       when {
         anyOf {
-          branch 'main'
+          branch 'main' 
           branch 'staging/**'
           branch 'staging_*'
           changeRequest target: 'main'
@@ -254,18 +303,12 @@ pipeline {
             sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=hy --text="վեց " --cache_dir ${HY_TN_CACHE}'
           }
         }
-        stage('L0: HE ITN grammars') {
-          steps {
-            sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=he --text="ת " --cache_dir ${HE_TN_CACHE}'
-          }
-        }
       }
     }
-
-    stage('L0: Create CJK TN/ITN Grammar') {
+    stage('L0: Create ZH TN/ITN Grammar') {
       when {
         anyOf {
-          branch 'main'
+          branch 'main' 
           branch 'staging/**'
           branch 'staging_*'
           changeRequest target: 'main'
@@ -283,30 +326,57 @@ pipeline {
             sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=zh --text="6" --cache_dir ${ZH_TN_CACHE}'
           }
         }
+      }
+    }
+    stage('L0: Create JA ITN Grammars') {
+      when {
+        anyOf {
+          branch 'main' 
+          branch 'staging/**'
+          branch 'staging_*'
+          changeRequest target: 'main'
+        }
+      }
+      failFast true
+      parallel {
         stage('L0: JA ITN grammars') {
           steps {
             sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=ja --text="100" --cache_dir ${JA_TN_CACHE}'
           }
         }
-        stage('L0: KO TN grammars') {
-          steps {
-            sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=ko --text="100" --cache_dir ${KO_TN_CACHE}'
-          }
+      }
+    }
+    stage('L0: Create KO TN/ITN Grammars') {
+      when {
+        anyOf {
+          branch 'main' 
+          branch 'staging/**'
+          branch 'staging_*'
+          changeRequest target: 'main'
         }
+      }
+      failFast true
+      parallel {
         stage('L0: KO ITN grammars') {
           steps {
             sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=ko --text="백" --cache_dir ${KO_TN_CACHE}'
           }
         }
+        stage('L0: KO TN grammars') {
+          steps {
+            sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=ko --text="100" --cache_dir ${KO_TN_CACHE}'
+          }
+        }
       }
     }
 
-    // L1 Tests starts here
+
+// L1 Tests starts here
 
     stage('L1: TN/ITN Tests CPU') {
       when {
         anyOf {
-          branch 'main'
+          branch 'main' 
           branch 'staging/**'
           branch 'staging_*'
           changeRequest target: 'main'
@@ -378,7 +448,7 @@ pipeline {
           steps {
             sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/ja/ -m "not pleasefixme" --cpu --tn_cache_dir ${JA_TN_CACHE}'
           }
-        }
+        }        
         stage('L1: Run all MR ITN tests (restore grammars from cache)') {
           steps {
             sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/mr/ -m "not pleasefixme" --cpu --tn_cache_dir ${MR_TN_CACHE}'
@@ -402,10 +472,10 @@ pipeline {
       }
     }
 
-    stage('L2: EN Sparrowhawk Tests') {
+     stage('L2: EN Sparrowhawk Tests') {
       when {
         anyOf {
-          branch 'main'
+          branch 'main' 
           branch 'staging/**'
           branch 'staging_*'
           changeRequest target: 'main'
@@ -417,12 +487,14 @@ pipeline {
           steps {
             sh 'CUDA_VISIBLE_DEVICES=""  cp -r /workspace/sparrowhawk/documentation/grammars /workspace/sparrowhawk/documentation/grammars_en_itn_grammars_lower_cased && cd tools/text_processing_deployment && bash sh_test.sh --MODE="test_itn_grammars" --OVERWRITE_CACHE=False --FAR_PATH=${EN_TN_CACHE}/SH_ITN --LANGUAGE="en"'
             sh 'CUDA_VISIBLE_DEVICES="" cd tests/nemo_text_processing/en && bash test_sparrowhawk_inverse_text_normalization.sh /workspace/sparrowhawk/documentation/grammars_en_itn_grammars_lower_cased `pwd`'
+
           }
         }
         stage('L2: EN ITN Run Sparrowhawk test - Cased Input') {
           steps {
             sh 'CUDA_VISIBLE_DEVICES=""  cp -r /workspace/sparrowhawk/documentation/grammars /workspace/sparrowhawk/documentation/grammars_en_itn_grammars_cased && cd tools/text_processing_deployment && bash sh_test.sh --MODE="test_itn_grammars" --INPUT_CASE="cased" --OVERWRITE_CACHE=False --FAR_PATH=${EN_TN_CACHE}/SH_ITN_cased --LANGUAGE="en"'
             sh 'CUDA_VISIBLE_DEVICES="" cd tests/nemo_text_processing/en && bash test_sparrowhawk_inverse_text_normalization_cased.sh /workspace/sparrowhawk/documentation/grammars_en_itn_grammars_cased `pwd`'
+
           }
         }
         stage('L2: EN TN Run Sparrowhawk test') {
@@ -431,13 +503,14 @@ pipeline {
             sh 'CUDA_VISIBLE_DEVICES="" cd tests/nemo_text_processing/en && bash test_sparrowhawk_normalization.sh /workspace/sparrowhawk/documentation/grammars_en_tn_grammars_cased `pwd`'
           }
         }
+
       }
     }
-
+    
     stage('L2: NeMo text processing') {
       when {
         anyOf {
-          branch 'main'
+          branch 'main' 
           branch 'staging/**'
           branch 'staging_*'
           changeRequest target: 'main'
@@ -456,6 +529,7 @@ pipeline {
             rm -rf $NORM_OUTPUT_DIR'
           }
         }
+
         stage('L2: Eng ITN export') {
           steps {
             sh 'TIME=`date +"%Y-%m-%d-%T"` && DENORM_OUTPUT_DIR=/home/jenkins/TestData/text_denorm/output_${TIME} && \
@@ -466,6 +540,8 @@ pipeline {
             rm -rf $DENORM_OUTPUT_DIR'
           }
         }
+
+
         stage('L2: Eng alignment TN') {
           steps {
             sh 'TIME=`date +"%Y-%m-%d-%T"` && NORM_OUTPUT_DIR=/home/jenkins/TestData/text_norm/output_${TIME} && mkdir $NORM_OUTPUT_DIR && \
@@ -474,6 +550,7 @@ pipeline {
             rm -rf $NORM_OUTPUT_DIR'
           }
         }
+
         stage('L2: Eng alignment ITN') {
           steps {
             sh 'TIME=`date +"%Y-%m-%d-%T"` && DENORM_OUTPUT_DIR=/home/jenkins/TestData/text_denorm/output_${TIME} && mkdir $DENORM_OUTPUT_DIR && \
@@ -482,10 +559,12 @@ pipeline {
             rm -rf $DENORM_OUTPUT_DIR'
           }
         }
+
       }
     }
   }
 
+
   post {
     always {
       sh 'chmod -R 777 .'

From 114b3db85aa78ef8453ad7336eaee2117d507942 Mon Sep 17 00:00:00 2001
From: Mariana Graterol Fuenmayor <marianag@nvidia.com>
Date: Thu, 23 Apr 2026 08:17:00 -0700
Subject: [PATCH 3/3] update jenkins cache

Signed-off-by: Mariana Graterol Fuenmayor <marianag@nvidia.com>
---
 Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 34b25bbbe..8f36c1fb2 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -28,7 +28,7 @@ pipeline {
     MR_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/03-12-24-1'
     JA_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/10-17-24-1'
     HI_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/02-18-26-0'
-    KO_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/04-22-26-0'
+    KO_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/04-23-26-0'
     DEFAULT_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/06-08-23-0'
   }
   stages {