From e4d6d123e5ebef546689957b7285c5cb3aed280f Mon Sep 17 00:00:00 2001 From: Greg Randall Date: Mon, 2 Mar 2026 12:09:38 -0500 Subject: [PATCH 1/5] feat: add Qwen3-TTS engine and update documentation for all engines - Implement Qwen3-TTS engine supporting voice cloning, design, and custom modes - Update README.md with comprehensive guides for Edge TTS and Qwen3-TTS - Add faster-qwen3-tts to requirements.txt - Configure .gitignore to exclude audio exports and temporary files - Register Qwen3-TTS in the engine registry and configuration schema --- .gitignore | 11 +++++++++ README.md | 47 +++++++++++++++++++++++++++++++++--- requirements.txt | 1 + tts_engine/__init__.py | 1 + tts_engine/config.py | 12 +++++++++ tts_engine/qwen3tts.py | 55 ++++++++++++++++++++++++++++++++++++++++++ tts_engine/registry.py | 7 +++++- 7 files changed, 129 insertions(+), 5 deletions(-) create mode 100644 tts_engine/qwen3tts.py diff --git a/.gitignore b/.gitignore index 4c5e972..9f1f178 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,14 @@ venv/ __pycache__ __pycache__/ *output/ +*.wav +*.mp3 +*.flac +*.opus +*.ogg +*.aac +*.m4a + + +*.md +!README.md diff --git a/README.md b/README.md index f63d5d9..cb03f78 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ A modular command-line interface for text-to-speech synthesis, supporting multip ## Features -- Supports multiple TTS engines (currently OpenAI and Kokoro) +- Supports multiple TTS engines (OpenAI, Kokoro, Edge TTS, and Qwen3-TTS) - Automatic text chunking with configurable chunk sizes - Parallel processing with multiple workers - Cost estimation and confirmation for paid services @@ -22,6 +22,8 @@ source venv/bin/activate pip install -r requirements.txt ``` +*Note: Qwen3-TTS requires a GPU with CUDA support for optimal performance and will install `torch` as a dependency.* + ### Environment Setup For OpenAI TTS, you'll need to set your API key: @@ -47,6 +49,22 @@ Convert text to speech using default settings (Kokoro engine): python cli.py input.txt --output-dir ./output_audio ``` +### Using Edge TTS (Free, High Quality) + +```bash +python cli.py input.txt --engine edge-tts --voice en-US-ChristopherNeural +``` + +### Using Qwen3-TTS (Local Voice Cloning) + +```bash +# Voice Cloning Mode +python cli.py input.txt --engine qwen3-tts --mode clone --ref-audio sample.wav --ref-text "Text from sample audio." + +# Voice Design Mode +python cli.py input.txt --engine qwen3-tts --mode design --instruct "A calm, deep male voice with a slight British accent." +``` + ### Using Kokoro Engine with Custom Voice ```bash @@ -72,6 +90,24 @@ Do you want to proceed? (y/N): y | `--chunk-size` | Maximum characters per chunk | 4000 | | `--max-workers` | Number of parallel workers | 4 | +### Edge TTS Engine Options + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `--voice` | Voice to use (e.g., `en-US-ChristopherNeural`, `en-GB-SoniaNeural`) | `en-US-ChristopherNeural` | + +### Qwen3-TTS Engine Options + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `--mode` | Generation mode: `clone`, `design`, or `custom` | `clone` | +| `--model` | HuggingFace model ID | `Qwen/Qwen3-TTS-12Hz-1.7B-Base` | +| `--language` | Language for synthesis | `English` | +| `--ref-audio` | Path to reference audio for voice cloning | `""` | +| `--ref-text` | Transcript of reference audio for voice cloning | `""` | +| `--instruct` | Voice design instruction (design mode) | `Warm, clear narrator voice.` | +| `--speaker` | Speaker ID (custom mode) | `aiden` | + ### Kokoro Engine Options | Parameter | Description | Default | @@ -94,10 +130,12 @@ Available OpenAI voices: `alloy`, `ash`, `coral`, `echo`, `fable`, `onyx`, `nova ## Full Usage ```bash -usage: cli.py [-h] [--engine {kokoro,openai}] [--output-dir OUTPUT_DIR] +usage: cli.py [-h] [--engine {kokoro,openai,edge-tts,qwen3-tts}] [--output-dir OUTPUT_DIR] [--chunk-size CHUNK_SIZE] [--max-workers MAX_WORKERS] [--lang-code LANG_CODE] [--speed SPEED] [--voice VOICE] - [--model MODEL] [--response-format RESPONSE_FORMAT] + [--model MODEL] [--response-format RESPONSE_FORMAT] + [--mode MODE] [--language LANGUAGE] [--ref-audio REF_AUDIO] + [--ref-text REF_TEXT] [--instruct INSTRUCT] [--speaker SPEAKER] input_file ``` @@ -119,4 +157,5 @@ The project is designed to be easily extensible. To add a new TTS engine: - NLTK for text chunking - SoundFile for audio processing - Pydantic for configuration management -- OpenAI and Kokoro SDKs for respective engines +- OpenAI, Kokoro, Edge TTS, and Faster-Qwen3-TTS SDKs +- PyTorch (for Qwen3-TTS) diff --git a/requirements.txt b/requirements.txt index 3c513d7..f375457 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ pydantic==2.10.6 soundfile==0.13.1 openai==1.61.1 edge-tts==6.1.9 +faster-qwen3-tts diff --git a/tts_engine/__init__.py b/tts_engine/__init__.py index b90e70f..4cbb7b0 100644 --- a/tts_engine/__init__.py +++ b/tts_engine/__init__.py @@ -1,3 +1,4 @@ from .kokoro import KokoroEngine from .openai import OpenAIEngine from .edgetts import EdgeTTSEngine +from .qwen3tts import Qwen3TTSEngine diff --git a/tts_engine/config.py b/tts_engine/config.py index 9675fe2..5b5dda3 100644 --- a/tts_engine/config.py +++ b/tts_engine/config.py @@ -47,6 +47,18 @@ class EdgeTTSConfig(TTSEngineConfig): cost_per_char: float = Field(default=0.0, description="Cost per character in USD") +class Qwen3TTSConfig(TTSEngineConfig): + engine_name: Literal["qwen3-tts"] = "qwen3-tts" + mode: str = Field(default="clone", description="Generation mode: clone, design, or custom") + model: str = Field(default="Qwen/Qwen3-TTS-12Hz-1.7B-Base", description="HuggingFace model ID") + language: str = Field(default="English", description="Language for synthesis") + ref_audio: str = Field(default="", description="Path to reference audio for voice cloning") + ref_text: str = Field(default="", description="Transcript of reference audio for voice cloning") + instruct: str = Field(default="Warm, clear narrator voice.", description="Voice design instruction (design mode)") + speaker: str = Field(default="aiden", description="Speaker ID (custom mode)") + cost_per_char: float = Field(default=0.0, description="Cost per character in USD") + + class TTSConfig(BaseConfig): """Main configuration supporting multiple engines""" diff --git a/tts_engine/qwen3tts.py b/tts_engine/qwen3tts.py new file mode 100644 index 0000000..ffa9475 --- /dev/null +++ b/tts_engine/qwen3tts.py @@ -0,0 +1,55 @@ +# tts_engine/qwen3tts.py +from pathlib import Path +from .base import TTSEngine, SynthesisResult +from tts_engine.config import Qwen3TTSConfig +from utils.file_manager import FileManager + + +class Qwen3TTSEngine(TTSEngine): + def __init__(self, config: Qwen3TTSConfig): + super().__init__(config) + + import torch + from faster_qwen3_tts import FasterQwen3TTS + + print(f"Loading Qwen3-TTS model: {config.model} (mode: {config.mode})") + self.model = FasterQwen3TTS.from_pretrained(config.model, device="cuda", dtype=torch.float16) + + def synthesize(self, text: str, output_path: Path, chunk_index: int = 1) -> SynthesisResult: + print(f"Processing text chunk {chunk_index} ({len(text)} characters)") + try: + mode = self.config.mode + if mode == "clone": + if not self.config.ref_audio or not self.config.ref_text: # empty string = not provided + raise ValueError("Clone mode requires --ref-audio and --ref-text") + wavs, sr = self.model.generate_voice_clone( + text=text, + language=self.config.language, + ref_audio=self.config.ref_audio, + ref_text=self.config.ref_text, + ) + elif mode == "design": + wavs, sr = self.model.generate_voice_design( + text=text, + instruct=self.config.instruct, + language=self.config.language, + ) + elif mode == "custom": + wavs, sr = self.model.generate_custom_voice( + text=text, + speaker=self.config.speaker, + language=self.config.language, + ) + else: + raise ValueError(f"Unknown mode: {mode}. Use clone, design, or custom.") + + FileManager.safe_write_audio(output_path, wavs[0], sr) + print(f" Saved chunk {chunk_index} to {output_path}") + + return SynthesisResult( + output_file=output_path, + character_count=len(text), + ) + except Exception as e: + print(f"Synthesis failed: {str(e)}") + raise diff --git a/tts_engine/registry.py b/tts_engine/registry.py index a318f62..1202edc 100644 --- a/tts_engine/registry.py +++ b/tts_engine/registry.py @@ -2,7 +2,8 @@ from .kokoro import KokoroEngine from .openai import OpenAIEngine from .edgetts import EdgeTTSEngine -from .config import KokoroConfig, OpenAIConfig, EdgeTTSConfig +from .qwen3tts import Qwen3TTSEngine +from .config import KokoroConfig, OpenAIConfig, EdgeTTSConfig, Qwen3TTSConfig TTS_REGISTRY = { "kokoro": { @@ -17,4 +18,8 @@ "engine": EdgeTTSEngine, "config": EdgeTTSConfig, }, + "qwen3-tts": { + "engine": Qwen3TTSEngine, + "config": Qwen3TTSConfig, + }, } From f0a72374ed60b10183a46b768e8d19fdd7a3bf3b Mon Sep 17 00:00:00 2001 From: Greg Randall Date: Mon, 2 Mar 2026 12:27:11 -0500 Subject: [PATCH 2/5] feat: implement engine-specific chunk sizes - Move chunk_size from global TTSConfig to engine-specific TTSEngineConfig - Set Qwen3-TTS default chunk size to 2000 for better stability - Update cli.py to use engine-specific chunk_size during processing - Update README.md tables to reflect per-engine chunking defaults --- README.md | 12 ++++++++---- cli.py | 4 ++-- tts_engine/config.py | 3 ++- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index cb03f78..5cc37f3 100644 --- a/README.md +++ b/README.md @@ -87,19 +87,21 @@ Do you want to proceed? (y/N): y | Parameter | Description | Default | |-----------|-------------|---------| | `--output-dir` | Output directory for audio files | `output` | -| `--chunk-size` | Maximum characters per chunk | 4000 | | `--max-workers` | Number of parallel workers | 4 | +| `--chunk-size` | Maximum characters per chunk | Engine-dependent (see below) | ### Edge TTS Engine Options | Parameter | Description | Default | |-----------|-------------|---------| | `--voice` | Voice to use (e.g., `en-US-ChristopherNeural`, `en-GB-SoniaNeural`) | `en-US-ChristopherNeural` | +| `--chunk-size` | Maximum characters per chunk | 4000 | ### Qwen3-TTS Engine Options | Parameter | Description | Default | |-----------|-------------|---------| +| `--chunk-size` | Maximum characters per chunk | 2000 | | `--mode` | Generation mode: `clone`, `design`, or `custom` | `clone` | | `--model` | HuggingFace model ID | `Qwen/Qwen3-TTS-12Hz-1.7B-Base` | | `--language` | Language for synthesis | `English` | @@ -112,6 +114,7 @@ Do you want to proceed? (y/N): y | Parameter | Description | Default | |-----------|-------------|---------| +| `--chunk-size` | Maximum characters per chunk | 4000 | | `--lang-code` | Language code for synthesis | "a" | | `--speed` | Speech speed multiplier | 1.0 | | `--voice` | Voice to use `af_bella`, `af_nicole`, `af_sarah`, `af_sky`, `bf_emma`, `bf_isabella`, `am_adam`, `am_michael`, `bm_george`, `bm_lewis`| "am_michael" | @@ -121,6 +124,7 @@ Do you want to proceed? (y/N): y | Parameter | Description | Default | |-----------|-------------|---------| +| `--chunk-size` | Maximum characters per chunk | 4000 | | `--model` | OpenAI TTS model | "tts-1-hd" | | `--voice` | Voice to use: `alloy`, `ash`, `coral`, `echo`, `fable`, `onyx`, `nova`, `sage`, `shimmer`| "alloy" | | `--response-format` | Audio format for output | "wav" | @@ -130,9 +134,9 @@ Available OpenAI voices: `alloy`, `ash`, `coral`, `echo`, `fable`, `onyx`, `nova ## Full Usage ```bash -usage: cli.py [-h] [--engine {kokoro,openai,edge-tts,qwen3-tts}] [--output-dir OUTPUT_DIR] - [--chunk-size CHUNK_SIZE] [--max-workers MAX_WORKERS] - [--lang-code LANG_CODE] [--speed SPEED] [--voice VOICE] +usage: cli.py [-h] [--engine {kokoro,openai,edge-tts,qwen3-tts}] [--output-dir OUTPUT_DIR] + [--max-workers MAX_WORKERS] [--chunk-size CHUNK_SIZE] + [--lang-code LANG_CODE] [--speed SPEED] [--voice VOICE] [--model MODEL] [--response-format RESPONSE_FORMAT] [--mode MODE] [--language LANGUAGE] [--ref-audio REF_AUDIO] [--ref-text REF_TEXT] [--instruct INSTRUCT] [--speaker SPEAKER] diff --git a/cli.py b/cli.py index e509568..6300991 100644 --- a/cli.py +++ b/cli.py @@ -83,7 +83,7 @@ def main(): with open(args.input_file) as f: input_text = f.read() - total_chars = calculate_total_characters(input_text, config.chunk_size) + total_chars = calculate_total_characters(input_text, config.engine_config.chunk_size) # Calculate and confirm costs if necessary total_cost = calculate_cost(total_chars, config.engine_config.cost_per_char) @@ -96,7 +96,7 @@ def main(): engine = TTS_REGISTRY[args.engine]["engine"](config.engine_config) # Process text - chunker = TextChunker(config.chunk_size) + chunker = TextChunker(config.engine_config.chunk_size) chunks = chunker.process(input_text) # Create output directory diff --git a/tts_engine/config.py b/tts_engine/config.py index 5b5dda3..31aed29 100644 --- a/tts_engine/config.py +++ b/tts_engine/config.py @@ -21,6 +21,7 @@ class TTSEngineConfig(BaseConfig): engine_name: str = Field(..., frozen=True) cost_per_char: float = Field(default=0.0, description="Cost per character in USD") + chunk_size: int = Field(default=4000, description="Maximum characters per chunk") class KokoroConfig(TTSEngineConfig): @@ -56,6 +57,7 @@ class Qwen3TTSConfig(TTSEngineConfig): ref_text: str = Field(default="", description="Transcript of reference audio for voice cloning") instruct: str = Field(default="Warm, clear narrator voice.", description="Voice design instruction (design mode)") speaker: str = Field(default="aiden", description="Speaker ID (custom mode)") + chunk_size: int = Field(default=2000, description="Maximum characters per chunk") cost_per_char: float = Field(default=0.0, description="Cost per character in USD") @@ -66,7 +68,6 @@ class TTSConfig(BaseConfig): output_dir: Path = Field( default=Path("output"), description="Output directory for audio files" ) - chunk_size: int = Field(default=4000, description="Maximum characters per chunk") max_workers: int = Field(default=4, description="Number of parallel workers") @classmethod From eeaf7aa12d6e033d90f36fb293292aaaf6b29f62 Mon Sep 17 00:00:00 2001 From: Greg Randall Date: Mon, 2 Mar 2026 13:20:16 -0500 Subject: [PATCH 3/5] feat: add tqdm progress bar and cleanup engine logging --- cli.py | 41 ++++++++++++++--------------------------- requirements.txt | 1 + tts_engine/edgetts.py | 4 ---- tts_engine/kokoro.py | 2 -- tts_engine/openai.py | 10 ---------- tts_engine/qwen3tts.py | 2 -- 6 files changed, 15 insertions(+), 45 deletions(-) diff --git a/cli.py b/cli.py index 6300991..70595a2 100644 --- a/cli.py +++ b/cli.py @@ -1,9 +1,10 @@ # cli.py import argparse -import concurrent.futures -from concurrent.futures import ThreadPoolExecutor +from concurrent.futures import ThreadPoolExecutor, as_completed from pathlib import Path +from tqdm import tqdm + from tts_engine.config import TTSConfig from tts_engine.registry import TTS_REGISTRY from utils import ( @@ -66,31 +67,13 @@ def create_arg_parser(): help=f"({engines_str}) {info['description']}", ) - return parser - - -def main(): - parser = create_arg_parser() - args = parser.parse_args() - - # Convert args to dict, only including non-None values - cli_args = {k: v for k, v in vars(args).items() if v is not None} - - # Create config using factory method - config = TTSConfig.create(args.engine, cli_args) - - # Read input text and calculate total characters - with open(args.input_file) as f: - input_text = f.read() + # Add benchmark flag + parser.add_argument( + ) - total_chars = calculate_total_characters(input_text, config.engine_config.chunk_size) + return parser - # Calculate and confirm costs if necessary - total_cost = calculate_cost(total_chars, config.engine_config.cost_per_char) - if not get_user_confirmation(total_cost): - print("Operation cancelled by user.") - return # Create engine instance from registry engine = TTS_REGISTRY[args.engine]["engine"](config.engine_config) @@ -99,6 +82,9 @@ def main(): chunker = TextChunker(config.engine_config.chunk_size) chunks = chunker.process(input_text) + # Run thread benchmark if requested + if args.thread_benchmark: + # Create output directory FileManager.create_output_dir(config.output_dir) @@ -114,9 +100,10 @@ def main(): for i, chunk in enumerate(chunks) ] - for future in concurrent.futures.as_completed(futures): - result = future.result() - print(f"Generated: {result.output_file}") + with tqdm(total=len(chunks), desc="Synthesizing", unit="chunk") as pbar: + for future in as_completed(futures): + future.result() + pbar.update(1) if __name__ == "__main__": diff --git a/requirements.txt b/requirements.txt index f375457..70f75e5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,4 @@ soundfile==0.13.1 openai==1.61.1 edge-tts==6.1.9 faster-qwen3-tts +tqdm diff --git a/tts_engine/edgetts.py b/tts_engine/edgetts.py index cae754c..d258e12 100644 --- a/tts_engine/edgetts.py +++ b/tts_engine/edgetts.py @@ -24,12 +24,8 @@ def synthesize( self, text: str, output_path: Path, chunk_index: int = 1 ) -> SynthesisResult: """Synthesize text to speech using Edge TTS""" - print(f"📦 Processing text chunk {chunk_index} ({len(text)} characters)") - try: - print(f" 🔊 Generating audio with Edge TTS...") asyncio.run(self._synthesize_async(text, output_path)) - print(f" 💾 Saved audio to {output_path}") return SynthesisResult( output_file=output_path, diff --git a/tts_engine/kokoro.py b/tts_engine/kokoro.py index 78c75fd..0e7c40a 100644 --- a/tts_engine/kokoro.py +++ b/tts_engine/kokoro.py @@ -18,7 +18,6 @@ def __init__(self, config: KokoroConfig): def synthesize( self, text: str, output_path: Path, chunk_index: int = 1 ) -> SynthesisResult: - print(f"📦 Processing text chunk {chunk_index} ({len(text)} characters)") try: generator = self.pipeline( text, @@ -28,7 +27,6 @@ def synthesize( ) results = [] for segment_number, (_, _, audio) in enumerate(generator, start=1): - print(f" 🔊 Audio segment {chunk_index}.{segment_number}") filename = output_path.with_name( f"{output_path.stem}_s{segment_number:03d}.wav" ) diff --git a/tts_engine/openai.py b/tts_engine/openai.py index 2e34653..4348366 100644 --- a/tts_engine/openai.py +++ b/tts_engine/openai.py @@ -2,7 +2,6 @@ from tts_engine.config import OpenAIConfig from pathlib import Path from openai import OpenAI -import time class OpenAIEngine(TTSEngine): @@ -13,12 +12,7 @@ def __init__(self, config: OpenAIConfig): def synthesize( self, text: str, output_path: Path, chunk_index: int = 1 ) -> SynthesisResult: - print(f"📦 Processing text chunk {chunk_index} ({len(text)} characters)") - try: - print(f" 🔊 Requesting audio from OpenAI API...") - start_time = time.time() - response = self.client.audio.speech.create( model=self.config.model, voice=self.config.voice, @@ -26,12 +20,8 @@ def synthesize( response_format=self.config.response_format, ) - print(f" 💾 Saving audio to {output_path}...") response.stream_to_file(str(output_path)) - processing_time = time.time() - start_time - print(f" ✅ Chunk {chunk_index} completed in {processing_time:.1f}s") - return SynthesisResult( output_file=output_path, character_count=len(text), diff --git a/tts_engine/qwen3tts.py b/tts_engine/qwen3tts.py index ffa9475..3c73c8d 100644 --- a/tts_engine/qwen3tts.py +++ b/tts_engine/qwen3tts.py @@ -16,7 +16,6 @@ def __init__(self, config: Qwen3TTSConfig): self.model = FasterQwen3TTS.from_pretrained(config.model, device="cuda", dtype=torch.float16) def synthesize(self, text: str, output_path: Path, chunk_index: int = 1) -> SynthesisResult: - print(f"Processing text chunk {chunk_index} ({len(text)} characters)") try: mode = self.config.mode if mode == "clone": @@ -44,7 +43,6 @@ def synthesize(self, text: str, output_path: Path, chunk_index: int = 1) -> Synt raise ValueError(f"Unknown mode: {mode}. Use clone, design, or custom.") FileManager.safe_write_audio(output_path, wavs[0], sr) - print(f" Saved chunk {chunk_index} to {output_path}") return SynthesisResult( output_file=output_path, From ca0ef92812c773d6dc680896b0f2cdbe7d5271b3 Mon Sep 17 00:00:00 2001 From: Greg Randall Date: Mon, 2 Mar 2026 13:20:18 -0500 Subject: [PATCH 4/5] feat: add thread count benchmarking tool --- cli.py | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/cli.py b/cli.py index 70595a2..4c14b2a 100644 --- a/cli.py +++ b/cli.py @@ -1,5 +1,7 @@ # cli.py import argparse +import tempfile +import time from concurrent.futures import ThreadPoolExecutor, as_completed from pathlib import Path @@ -69,11 +71,87 @@ def create_arg_parser(): # Add benchmark flag parser.add_argument( + "--thread-benchmark", + action="store_true", + default=False, + help="Benchmark thread counts (1,2,4,6,8) on a sample of chunks, then exit", ) return parser +def _run_benchmark(engine, chunks, output_dir): + """Benchmark different thread counts and report results.""" + sample = chunks[:3] if len(chunks) >= 3 else chunks + thread_counts = [1, 2, 4, 6, 8] + results = [] + best_time = float("inf") + consecutive_worse = 0 + + print(f"\nBenchmarking with {len(sample)} sample chunk(s)...\n") + print(f"{'Threads':>8} {'Time (s)':>9} {'vs best':>8} Note") + print("-" * 45) + + for n_threads in thread_counts: + with tempfile.TemporaryDirectory() as tmp_dir: + tmp_path = Path(tmp_dir) + start = time.perf_counter() + with ThreadPoolExecutor(max_workers=n_threads) as executor: + futures = [ + executor.submit( + engine.synthesize, + chunk, + tmp_path / f"bench_{i:04d}.wav", + chunk_index=i + 1, + ) + for i, chunk in enumerate(sample) + ] + for future in as_completed(futures): + future.result() + elapsed = time.perf_counter() - start + + note = "" + if elapsed < best_time: + best_time = elapsed + best_threads = n_threads + consecutive_worse = 0 + note = "<-- best so far" + else: + consecutive_worse += 1 + + results.append((n_threads, elapsed)) + ratio = elapsed / best_time + print(f"{n_threads:>8} {elapsed:>9.2f} {ratio:>7.2f}x {note}") + + if consecutive_worse >= 2: + print("\nStopping early: 2 consecutive results worse than best.") + break + + print(f"\nRecommended: --max-workers {best_threads}") + + +def main(): + parser = create_arg_parser() + args = parser.parse_args() + + # Convert args to dict, only including non-None values + cli_args = {k: v for k, v in vars(args).items() if v is not None} + + # Create config using factory method + config = TTSConfig.create(args.engine, cli_args) + + # Read input text and calculate total characters + with open(args.input_file) as f: + input_text = f.read() + + total_chars = calculate_total_characters(input_text, config.engine_config.chunk_size) + + # Calculate and confirm costs if necessary + total_cost = calculate_cost(total_chars, config.engine_config.cost_per_char) + + if not get_user_confirmation(total_cost): + print("Operation cancelled by user.") + return # Create engine instance from registry engine = TTS_REGISTRY[args.engine]["engine"](config.engine_config) @@ -84,6 +162,8 @@ def create_arg_parser(): # Run thread benchmark if requested if args.thread_benchmark: + _run_benchmark(engine, chunks, config.output_dir) + return # Create output directory FileManager.create_output_dir(config.output_dir) From 8d7fa6ecbdcf594f9447c1cd38257e14f7886c6d Mon Sep 17 00:00:00 2001 From: Greg Randall Date: Mon, 2 Mar 2026 13:20:22 -0500 Subject: [PATCH 5/5] chore: ignore peak files (*.pkf) --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 9f1f178..348b73c 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ __pycache__ __pycache__/ *output/ *.wav +*.pkf *.mp3 *.flac *.opus