mirror of
https://github.com/kuhyx/testsAndMisc-archive.git
synced 2026-07-04 14:23:04 +02:00
feat: added translations
This commit is contained in:
parent
bcb17f60e0
commit
acd6466203
@ -3,10 +3,13 @@
|
|||||||
This package provides tools for:
|
This package provides tools for:
|
||||||
1. Analyzing word frequency in text (analyzer module)
|
1. Analyzing word frequency in text (analyzer module)
|
||||||
2. Finding text excerpts where target words are most prevalent (excerpt_finder module)
|
2. Finding text excerpts where target words are most prevalent (excerpt_finder module)
|
||||||
|
3. Combining analysis with excerpts for language learning (learning_pipe module)
|
||||||
|
4. Offline translation between languages (translator module)
|
||||||
|
|
||||||
Example usage:
|
Example usage:
|
||||||
from python_pkg.word_frequency.analyzer import analyze_text, analyze_and_format
|
from python_pkg.word_frequency.analyzer import analyze_text, analyze_and_format
|
||||||
from python_pkg.word_frequency.excerpt_finder import find_best_excerpt
|
from python_pkg.word_frequency.excerpt_finder import find_best_excerpt
|
||||||
|
from python_pkg.word_frequency.translator import translate_words
|
||||||
|
|
||||||
# Analyze word frequency
|
# Analyze word frequency
|
||||||
counts = analyze_text("hello world hello")
|
counts = analyze_text("hello world hello")
|
||||||
@ -18,6 +21,10 @@ Example usage:
|
|||||||
target_words=["and", "the"],
|
target_words=["and", "the"],
|
||||||
excerpt_length=3,
|
excerpt_length=3,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Translate words (requires argostranslate installed)
|
||||||
|
translations = translate_words(["hello", "world"], "en", "es")
|
||||||
|
```
|
||||||
print(results[0].excerpt) # "and she and" or similar
|
print(results[0].excerpt) # "and she and" or similar
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|||||||
@ -38,9 +38,19 @@ from typing import TYPE_CHECKING
|
|||||||
try:
|
try:
|
||||||
from python_pkg.word_frequency.analyzer import analyze_text, read_file
|
from python_pkg.word_frequency.analyzer import analyze_text, read_file
|
||||||
from python_pkg.word_frequency.excerpt_finder import find_best_excerpt
|
from python_pkg.word_frequency.excerpt_finder import find_best_excerpt
|
||||||
|
from python_pkg.word_frequency.translator import (
|
||||||
|
TranslationResult,
|
||||||
|
detect_language,
|
||||||
|
translate_words_batch,
|
||||||
|
)
|
||||||
except ModuleNotFoundError:
|
except ModuleNotFoundError:
|
||||||
from analyzer import analyze_text, read_file # type: ignore[import-not-found]
|
from analyzer import analyze_text, read_file # type: ignore[import-not-found]
|
||||||
from excerpt_finder import find_best_excerpt # type: ignore[import-not-found]
|
from excerpt_finder import find_best_excerpt # type: ignore[import-not-found]
|
||||||
|
from translator import ( # type: ignore[import-not-found]
|
||||||
|
TranslationResult,
|
||||||
|
detect_language,
|
||||||
|
translate_words_batch,
|
||||||
|
)
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from collections.abc import Sequence
|
from collections.abc import Sequence
|
||||||
@ -94,6 +104,8 @@ def generate_learning_lesson(
|
|||||||
skip_numbers: bool = True,
|
skip_numbers: bool = True,
|
||||||
case_sensitive: bool = False,
|
case_sensitive: bool = False,
|
||||||
context_words: int = 5,
|
context_words: int = 5,
|
||||||
|
translate_from: str | None = None,
|
||||||
|
translate_to: str | None = None,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Generate a learning lesson from text.
|
"""Generate a learning lesson from text.
|
||||||
|
|
||||||
@ -108,6 +120,8 @@ def generate_learning_lesson(
|
|||||||
skip_numbers: If True, filter out numeric words (default: True).
|
skip_numbers: If True, filter out numeric words (default: True).
|
||||||
case_sensitive: If True, treat words case-sensitively.
|
case_sensitive: If True, treat words case-sensitively.
|
||||||
context_words: Words of context to include around excerpts.
|
context_words: Words of context to include around excerpts.
|
||||||
|
translate_from: Source language code for translation (e.g., 'la', 'pl').
|
||||||
|
translate_to: Target language code for translation (e.g., 'en').
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Formatted learning lesson as a string.
|
Formatted learning lesson as a string.
|
||||||
@ -142,6 +156,30 @@ def generate_learning_lesson(
|
|||||||
lines.append(f"After filtering {len(all_stopwords)} stopwords: {len(filtered_words):,} vocabulary words")
|
lines.append(f"After filtering {len(all_stopwords)} stopwords: {len(filtered_words):,} vocabulary words")
|
||||||
else:
|
else:
|
||||||
lines.append(f"Vocabulary words: {len(filtered_words):,}")
|
lines.append(f"Vocabulary words: {len(filtered_words):,}")
|
||||||
|
|
||||||
|
# Handle translation setup
|
||||||
|
actual_translate_from = translate_from
|
||||||
|
actual_translate_to = translate_to or "en" # Default to English
|
||||||
|
|
||||||
|
# Auto-detect language if translation is enabled but source not specified
|
||||||
|
if translate_from == "auto" or (translate_to and not translate_from):
|
||||||
|
detected = detect_language(text)
|
||||||
|
if detected:
|
||||||
|
actual_translate_from = detected
|
||||||
|
lines.append(f"Detected language: {detected}")
|
||||||
|
# Note: langdetect doesn't support Latin (often detected as Italian)
|
||||||
|
# If detection seems wrong, use --translate-from to override
|
||||||
|
else:
|
||||||
|
lines.append(
|
||||||
|
"Warning: Could not detect language "
|
||||||
|
"(install langdetect: pip install langdetect)"
|
||||||
|
)
|
||||||
|
actual_translate_from = None
|
||||||
|
|
||||||
|
do_translate = actual_translate_from is not None and actual_translate_to is not None
|
||||||
|
if do_translate:
|
||||||
|
lines.append(f"Translation: {actual_translate_from} -> {actual_translate_to}")
|
||||||
|
|
||||||
lines.append("")
|
lines.append("")
|
||||||
|
|
||||||
# Generate batches
|
# Generate batches
|
||||||
@ -162,13 +200,37 @@ def generate_learning_lesson(
|
|||||||
lines.append("-" * 70)
|
lines.append("-" * 70)
|
||||||
lines.append("")
|
lines.append("")
|
||||||
|
|
||||||
|
# Get translations if requested
|
||||||
|
translations: dict[str, str] = {}
|
||||||
|
if do_translate:
|
||||||
|
words_to_translate = [word for word, _ in batch_words]
|
||||||
|
translation_results = translate_words_batch(
|
||||||
|
words_to_translate,
|
||||||
|
actual_translate_from, # type: ignore[arg-type]
|
||||||
|
actual_translate_to, # type: ignore[arg-type]
|
||||||
|
)
|
||||||
|
translations = {
|
||||||
|
r.source_word: r.translated_word
|
||||||
|
for r in translation_results
|
||||||
|
if r.success
|
||||||
|
}
|
||||||
|
|
||||||
# Word list with frequencies
|
# Word list with frequencies
|
||||||
lines.append("VOCABULARY TO LEARN:")
|
lines.append("VOCABULARY TO LEARN:")
|
||||||
lines.append("")
|
lines.append("")
|
||||||
|
|
||||||
for i, (word, count) in enumerate(batch_words, start=start_idx + 1):
|
if do_translate and translations:
|
||||||
percentage = (count / total_words) * 100
|
# Include translations in output
|
||||||
lines.append(f" {i:3}. {word:<20} ({count:,} occurrences, {percentage:.2f}%)")
|
for i, (word, count) in enumerate(batch_words, start=start_idx + 1):
|
||||||
|
percentage = (count / total_words) * 100
|
||||||
|
trans = translations.get(word, "?")
|
||||||
|
lines.append(
|
||||||
|
f" {i:3}. {word:<20} -> {trans:<20} ({count:,} occurrences, {percentage:.2f}%)"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
for i, (word, count) in enumerate(batch_words, start=start_idx + 1):
|
||||||
|
percentage = (count / total_words) * 100
|
||||||
|
lines.append(f" {i:3}. {word:<20} ({count:,} occurrences, {percentage:.2f}%)")
|
||||||
|
|
||||||
lines.append("")
|
lines.append("")
|
||||||
|
|
||||||
@ -301,6 +363,27 @@ def main(argv: Sequence[str] | None = None) -> int:
|
|||||||
help="Include numeric words in vocabulary (filtered by default)",
|
help="Include numeric words in vocabulary (filtered by default)",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Translation options (enabled by default)
|
||||||
|
parser.add_argument(
|
||||||
|
"--no-translate",
|
||||||
|
"-T",
|
||||||
|
action="store_true",
|
||||||
|
help="Disable translation",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--translate-from",
|
||||||
|
type=str,
|
||||||
|
metavar="LANG",
|
||||||
|
help="Source language code (e.g., 'la', 'pl', 'de'). If omitted, auto-detected.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--translate-to",
|
||||||
|
type=str,
|
||||||
|
metavar="LANG",
|
||||||
|
default="en",
|
||||||
|
help="Target language code (default: 'en')",
|
||||||
|
)
|
||||||
|
|
||||||
# Output options
|
# Output options
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--output",
|
"--output",
|
||||||
@ -321,6 +404,15 @@ def main(argv: Sequence[str] | None = None) -> int:
|
|||||||
# Load custom stopwords if provided
|
# Load custom stopwords if provided
|
||||||
custom_stopwords = load_stopwords(args.stopwords)
|
custom_stopwords = load_stopwords(args.stopwords)
|
||||||
|
|
||||||
|
# Determine translation settings
|
||||||
|
# Translation enabled by default, --no-translate disables it
|
||||||
|
translate_from: str | None = None
|
||||||
|
translate_to: str | None = None
|
||||||
|
|
||||||
|
if not args.no_translate:
|
||||||
|
translate_from = args.translate_from or "auto" # "auto" triggers detection
|
||||||
|
translate_to = args.translate_to
|
||||||
|
|
||||||
# Generate lesson
|
# Generate lesson
|
||||||
lesson = generate_learning_lesson(
|
lesson = generate_learning_lesson(
|
||||||
text,
|
text,
|
||||||
@ -332,6 +424,8 @@ def main(argv: Sequence[str] | None = None) -> int:
|
|||||||
skip_default_stopwords=args.no_default_stopwords,
|
skip_default_stopwords=args.no_default_stopwords,
|
||||||
skip_numbers=not args.include_numbers,
|
skip_numbers=not args.include_numbers,
|
||||||
case_sensitive=args.case_sensitive,
|
case_sensitive=args.case_sensitive,
|
||||||
|
translate_from=translate_from,
|
||||||
|
translate_to=translate_to,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Output
|
# Output
|
||||||
|
|||||||
@ -309,3 +309,90 @@ class TestDefaultStopwords:
|
|||||||
"""Test that all stopwords are lowercase."""
|
"""Test that all stopwords are lowercase."""
|
||||||
for word in DEFAULT_STOPWORDS_EN:
|
for word in DEFAULT_STOPWORDS_EN:
|
||||||
assert word == word.lower()
|
assert word == word.lower()
|
||||||
|
|
||||||
|
|
||||||
|
class TestTranslationIntegration:
|
||||||
|
"""Tests for translation integration in learning_pipe."""
|
||||||
|
|
||||||
|
def test_lesson_without_translation(self) -> None:
|
||||||
|
"""Test that lesson works without translation."""
|
||||||
|
text = "hello world hello world hello"
|
||||||
|
result = generate_learning_lesson(
|
||||||
|
text,
|
||||||
|
batch_size=5,
|
||||||
|
num_batches=1,
|
||||||
|
skip_default_stopwords=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert "hello" in result
|
||||||
|
assert "world" in result
|
||||||
|
# Should not have translation arrows
|
||||||
|
assert " -> " not in result or "Translation" not in result
|
||||||
|
|
||||||
|
def test_lesson_with_translation_params(self) -> None:
|
||||||
|
"""Test that translation params are accepted."""
|
||||||
|
text = "hello world hello world hello"
|
||||||
|
# This should not crash even without argostranslate installed
|
||||||
|
result = generate_learning_lesson(
|
||||||
|
text,
|
||||||
|
batch_size=5,
|
||||||
|
num_batches=1,
|
||||||
|
skip_default_stopwords=True,
|
||||||
|
translate_from="en",
|
||||||
|
translate_to="es",
|
||||||
|
)
|
||||||
|
|
||||||
|
# The lesson should still be generated
|
||||||
|
assert "VOCABULARY TO LEARN:" in result
|
||||||
|
assert "hello" in result
|
||||||
|
|
||||||
|
def test_main_with_translate_flags(self, tmp_path: Path) -> None:
|
||||||
|
"""Test that main accepts translation flags."""
|
||||||
|
text_file = tmp_path / "test.txt"
|
||||||
|
text_file.write_text("hello world hello world hello", encoding="utf-8")
|
||||||
|
|
||||||
|
# Should not crash even if translation fails
|
||||||
|
result = main([
|
||||||
|
"--file", str(text_file),
|
||||||
|
"--translate-from", "en",
|
||||||
|
"--translate-to", "es",
|
||||||
|
"--no-default-stopwords",
|
||||||
|
])
|
||||||
|
|
||||||
|
assert result == 0
|
||||||
|
|
||||||
|
def test_translate_to_defaults_to_english(self, capsys: pytest.CaptureFixture[str]) -> None:
|
||||||
|
"""Test that translate_to defaults to 'en' when using auto-detection."""
|
||||||
|
text = "hello world"
|
||||||
|
# When using --translate flag (translate_from="auto"), translate_to defaults to "en"
|
||||||
|
result = generate_learning_lesson(
|
||||||
|
text,
|
||||||
|
batch_size=5,
|
||||||
|
num_batches=1,
|
||||||
|
skip_default_stopwords=True,
|
||||||
|
translate_from="auto", # Auto-detect source language
|
||||||
|
translate_to=None, # Should default to English
|
||||||
|
)
|
||||||
|
|
||||||
|
# Should have translation output with auto-detected source -> en
|
||||||
|
assert "Detected language:" in result
|
||||||
|
assert " -> en" in result
|
||||||
|
|
||||||
|
def test_no_translation_when_both_none(
|
||||||
|
self, capsys: pytest.CaptureFixture[str]
|
||||||
|
) -> None:
|
||||||
|
"""Test no translation happens when both translate_from and translate_to are None."""
|
||||||
|
text = "hello world"
|
||||||
|
result = generate_learning_lesson(
|
||||||
|
text,
|
||||||
|
batch_size=5,
|
||||||
|
num_batches=1,
|
||||||
|
skip_default_stopwords=True,
|
||||||
|
translate_from=None,
|
||||||
|
translate_to=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Should not have translation output
|
||||||
|
assert "Translation:" not in result
|
||||||
|
assert "Detected language:" not in result
|
||||||
|
|
||||||
|
|||||||
619
python_pkg/word_frequency/tests/test_translator.py
Normal file
619
python_pkg/word_frequency/tests/test_translator.py
Normal file
@ -0,0 +1,619 @@
|
|||||||
|
"""Tests for the offline translator module."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from collections.abc import Generator
|
||||||
|
|
||||||
|
# Import the module
|
||||||
|
try:
|
||||||
|
from python_pkg.word_frequency import translator
|
||||||
|
from python_pkg.word_frequency.translator import (
|
||||||
|
TranslationResult,
|
||||||
|
download_languages,
|
||||||
|
format_translations,
|
||||||
|
get_available_packages,
|
||||||
|
get_installed_languages,
|
||||||
|
main,
|
||||||
|
read_file,
|
||||||
|
translate_word,
|
||||||
|
translate_words,
|
||||||
|
translate_words_batch,
|
||||||
|
)
|
||||||
|
except ImportError:
|
||||||
|
# Direct execution support
|
||||||
|
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
|
||||||
|
from python_pkg.word_frequency import translator
|
||||||
|
from python_pkg.word_frequency.translator import (
|
||||||
|
TranslationResult,
|
||||||
|
download_languages,
|
||||||
|
format_translations,
|
||||||
|
get_available_packages,
|
||||||
|
get_installed_languages,
|
||||||
|
main,
|
||||||
|
read_file,
|
||||||
|
translate_word,
|
||||||
|
translate_words,
|
||||||
|
translate_words_batch,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# Helper context manager for mocking argostranslate
|
||||||
|
class ArgosAvailableMock:
|
||||||
|
"""Context manager to mock argostranslate being available."""
|
||||||
|
|
||||||
|
def __init__(self, translate_returns: str | list[str] | Exception | None = None) -> None:
|
||||||
|
"""Initialize with return values for translate()."""
|
||||||
|
self.translate_returns = translate_returns
|
||||||
|
self.mock_translate_module = MagicMock()
|
||||||
|
self.mock_package_module = MagicMock()
|
||||||
|
self.mock_parent = MagicMock()
|
||||||
|
self.original_available = translator._argos_available
|
||||||
|
|
||||||
|
def __enter__(self) -> MagicMock:
|
||||||
|
"""Set up the mocks."""
|
||||||
|
translator._argos_available = True
|
||||||
|
|
||||||
|
# Set up translate return value
|
||||||
|
if isinstance(self.translate_returns, Exception):
|
||||||
|
self.mock_translate_module.translate.side_effect = self.translate_returns
|
||||||
|
elif isinstance(self.translate_returns, list):
|
||||||
|
self.mock_translate_module.translate.side_effect = self.translate_returns
|
||||||
|
elif self.translate_returns is not None:
|
||||||
|
self.mock_translate_module.translate.return_value = self.translate_returns
|
||||||
|
|
||||||
|
# Link parent module to submodules (critical for Python imports)
|
||||||
|
self.mock_parent.translate = self.mock_translate_module
|
||||||
|
self.mock_parent.package = self.mock_package_module
|
||||||
|
|
||||||
|
# Patch sys.modules
|
||||||
|
self.patchers = [
|
||||||
|
patch.dict(
|
||||||
|
"sys.modules",
|
||||||
|
{
|
||||||
|
"argostranslate": self.mock_parent,
|
||||||
|
"argostranslate.translate": self.mock_translate_module,
|
||||||
|
"argostranslate.package": self.mock_package_module,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
for p in self.patchers:
|
||||||
|
p.start()
|
||||||
|
|
||||||
|
return self.mock_translate_module
|
||||||
|
|
||||||
|
def __exit__(self, *args: object) -> None:
|
||||||
|
"""Restore original state."""
|
||||||
|
for p in self.patchers:
|
||||||
|
p.stop()
|
||||||
|
translator._argos_available = self.original_available
|
||||||
|
|
||||||
|
|
||||||
|
# Fixtures
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_argos_unavailable() -> Generator[None, None, None]:
|
||||||
|
"""Mock argostranslate being unavailable."""
|
||||||
|
original_value = translator._argos_available
|
||||||
|
translator._argos_available = False
|
||||||
|
yield
|
||||||
|
translator._argos_available = original_value
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_all_translators_unavailable() -> Generator[None, None, None]:
|
||||||
|
"""Mock both argostranslate and deep-translator being unavailable."""
|
||||||
|
original_argos = translator._argos_available
|
||||||
|
original_deep = translator._deep_translator_available
|
||||||
|
translator._argos_available = False
|
||||||
|
translator._deep_translator_available = False
|
||||||
|
yield
|
||||||
|
translator._argos_available = original_argos
|
||||||
|
translator._deep_translator_available = original_deep
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def temp_words_file(tmp_path: Path) -> Path:
|
||||||
|
"""Create a temporary file with words."""
|
||||||
|
words_file = tmp_path / "words.txt"
|
||||||
|
words_file.write_text("hello\nworld\ngoodbye\n", encoding="utf-8")
|
||||||
|
return words_file
|
||||||
|
|
||||||
|
|
||||||
|
# TranslationResult tests
|
||||||
|
|
||||||
|
|
||||||
|
class TestTranslationResult:
|
||||||
|
"""Tests for TranslationResult namedtuple."""
|
||||||
|
|
||||||
|
def test_successful_result(self) -> None:
|
||||||
|
"""Test creating a successful translation result."""
|
||||||
|
result = TranslationResult(
|
||||||
|
source_word="hello",
|
||||||
|
translated_word="hola",
|
||||||
|
source_lang="en",
|
||||||
|
target_lang="es",
|
||||||
|
success=True,
|
||||||
|
)
|
||||||
|
assert result.source_word == "hello"
|
||||||
|
assert result.translated_word == "hola"
|
||||||
|
assert result.source_lang == "en"
|
||||||
|
assert result.target_lang == "es"
|
||||||
|
assert result.success is True
|
||||||
|
assert result.error is None
|
||||||
|
|
||||||
|
def test_failed_result(self) -> None:
|
||||||
|
"""Test creating a failed translation result."""
|
||||||
|
result = TranslationResult(
|
||||||
|
source_word="xyz",
|
||||||
|
translated_word="",
|
||||||
|
source_lang="en",
|
||||||
|
target_lang="xx",
|
||||||
|
success=False,
|
||||||
|
error="Language not supported",
|
||||||
|
)
|
||||||
|
assert result.success is False
|
||||||
|
assert result.error == "Language not supported"
|
||||||
|
|
||||||
|
def test_result_is_tuple(self) -> None:
|
||||||
|
"""Test that TranslationResult is a namedtuple."""
|
||||||
|
result = TranslationResult("a", "b", "en", "es", True)
|
||||||
|
assert isinstance(result, tuple)
|
||||||
|
assert len(result) == 6
|
||||||
|
|
||||||
|
|
||||||
|
# translate_word tests
|
||||||
|
|
||||||
|
|
||||||
|
class TestTranslateWord:
|
||||||
|
"""Tests for translate_word function."""
|
||||||
|
|
||||||
|
def test_translate_word_all_backends_unavailable(
|
||||||
|
self, mock_all_translators_unavailable: None
|
||||||
|
) -> None:
|
||||||
|
"""Test translation when no backends are available."""
|
||||||
|
result = translate_word("hello", "en", "es")
|
||||||
|
assert result.success is False
|
||||||
|
assert "No translation backend" in str(result.error)
|
||||||
|
|
||||||
|
def test_translate_word_argos_unavailable_uses_deep_translator(
|
||||||
|
self, mock_argos_unavailable: None
|
||||||
|
) -> None:
|
||||||
|
"""Test that deep-translator is used when argos is unavailable."""
|
||||||
|
# deep-translator should work as fallback (it's installed)
|
||||||
|
result = translate_word("hello", "en", "es")
|
||||||
|
# This may succeed if deep-translator is installed
|
||||||
|
# Just verify we get a result without crashing
|
||||||
|
assert isinstance(result, TranslationResult)
|
||||||
|
|
||||||
|
def test_translate_word_success(self) -> None:
|
||||||
|
"""Test successful word translation."""
|
||||||
|
with ArgosAvailableMock("hola"):
|
||||||
|
result = translate_word("hello", "en", "es")
|
||||||
|
|
||||||
|
assert result.source_word == "hello"
|
||||||
|
assert result.translated_word == "hola"
|
||||||
|
assert result.success is True
|
||||||
|
|
||||||
|
def test_translate_word_argos_exception_falls_back(
|
||||||
|
self, mock_argos_unavailable: None
|
||||||
|
) -> None:
|
||||||
|
"""Test that argos exception falls back to deep-translator."""
|
||||||
|
# With argos unavailable, deep-translator should be used
|
||||||
|
result = translate_word("hello", "en", "es")
|
||||||
|
# Just verify it doesn't crash - may succeed or fail depending on network
|
||||||
|
assert isinstance(result, TranslationResult)
|
||||||
|
|
||||||
|
|
||||||
|
# translate_words tests
|
||||||
|
|
||||||
|
|
||||||
|
class TestTranslateWords:
|
||||||
|
"""Tests for translate_words function."""
|
||||||
|
|
||||||
|
def test_translate_empty_list(self) -> None:
|
||||||
|
"""Test translating empty list."""
|
||||||
|
results = translate_words([], "en", "es")
|
||||||
|
assert results == []
|
||||||
|
|
||||||
|
def test_translate_multiple_words(self) -> None:
|
||||||
|
"""Test translating multiple words."""
|
||||||
|
with ArgosAvailableMock(["hola", "mundo"]):
|
||||||
|
results = translate_words(["hello", "world"], "en", "es")
|
||||||
|
|
||||||
|
assert len(results) == 2
|
||||||
|
assert results[0].translated_word == "hola"
|
||||||
|
assert results[1].translated_word == "mundo"
|
||||||
|
|
||||||
|
|
||||||
|
# translate_words_batch tests
|
||||||
|
|
||||||
|
|
||||||
|
class TestTranslateWordsBatch:
|
||||||
|
"""Tests for translate_words_batch function."""
|
||||||
|
|
||||||
|
def test_batch_empty_list(self) -> None:
|
||||||
|
"""Test batch translation of empty list."""
|
||||||
|
results = translate_words_batch([], "en", "es")
|
||||||
|
assert results == []
|
||||||
|
|
||||||
|
def test_batch_small_list(self) -> None:
|
||||||
|
"""Test batch translation of small list (3 or fewer)."""
|
||||||
|
with ArgosAvailableMock(["uno", "dos", "tres"]) as mock:
|
||||||
|
results = translate_words_batch(["one", "two", "three"], "en", "es")
|
||||||
|
|
||||||
|
assert len(results) == 3
|
||||||
|
# Small lists use individual translation
|
||||||
|
assert mock.translate.call_count == 3
|
||||||
|
|
||||||
|
def test_batch_large_list_success(self) -> None:
|
||||||
|
"""Test batch translation of large list."""
|
||||||
|
words = ["one", "two", "three", "four", "five"]
|
||||||
|
|
||||||
|
with ArgosAvailableMock("uno\ndos\ntres\ncuatro\ncinco") as mock:
|
||||||
|
results = translate_words_batch(words, "en", "es")
|
||||||
|
|
||||||
|
assert len(results) == 5
|
||||||
|
# Batch translation called once
|
||||||
|
mock.translate.assert_called_once()
|
||||||
|
assert results[0].translated_word == "uno"
|
||||||
|
assert results[4].translated_word == "cinco"
|
||||||
|
|
||||||
|
def test_batch_fallback_on_mismatch(self) -> None:
|
||||||
|
"""Test batch translation falls back when result count mismatches."""
|
||||||
|
words = ["one", "two", "three", "four"]
|
||||||
|
# First call (batch) returns wrong count, subsequent calls are individual
|
||||||
|
with ArgosAvailableMock(
|
||||||
|
["wrong\ncount", "uno", "dos", "tres", "cuatro"]
|
||||||
|
) as mock:
|
||||||
|
results = translate_words_batch(words, "en", "es")
|
||||||
|
|
||||||
|
assert len(results) == 4
|
||||||
|
# Fallback to individual
|
||||||
|
assert mock.translate.call_count == 5
|
||||||
|
|
||||||
|
def test_batch_fallback_on_exception(self) -> None:
|
||||||
|
"""Test batch translation falls back on exception."""
|
||||||
|
words = ["one", "two", "three", "four"]
|
||||||
|
|
||||||
|
# Create mock that raises first then succeeds
|
||||||
|
original = translator._argos_available
|
||||||
|
translator._argos_available = True
|
||||||
|
|
||||||
|
mock_translate_module = MagicMock()
|
||||||
|
mock_translate_module.translate.side_effect = [
|
||||||
|
RuntimeError("Batch failed"),
|
||||||
|
"uno",
|
||||||
|
"dos",
|
||||||
|
"tres",
|
||||||
|
"cuatro",
|
||||||
|
]
|
||||||
|
mock_package_module = MagicMock()
|
||||||
|
mock_parent = MagicMock()
|
||||||
|
mock_parent.translate = mock_translate_module
|
||||||
|
mock_parent.package = mock_package_module
|
||||||
|
|
||||||
|
with patch.dict(
|
||||||
|
"sys.modules",
|
||||||
|
{
|
||||||
|
"argostranslate": mock_parent,
|
||||||
|
"argostranslate.translate": mock_translate_module,
|
||||||
|
"argostranslate.package": mock_package_module,
|
||||||
|
},
|
||||||
|
):
|
||||||
|
results = translate_words_batch(words, "en", "es")
|
||||||
|
|
||||||
|
translator._argos_available = original
|
||||||
|
|
||||||
|
assert len(results) == 4
|
||||||
|
|
||||||
|
|
||||||
|
# format_translations tests
|
||||||
|
|
||||||
|
|
||||||
|
class TestFormatTranslations:
|
||||||
|
"""Tests for format_translations function."""
|
||||||
|
|
||||||
|
def test_format_empty(self) -> None:
|
||||||
|
"""Test formatting empty results."""
|
||||||
|
output = format_translations([])
|
||||||
|
assert output == "No translations."
|
||||||
|
|
||||||
|
def test_format_single_translation(self) -> None:
|
||||||
|
"""Test formatting single translation."""
|
||||||
|
results = [
|
||||||
|
TranslationResult("hello", "hola", "en", "es", True),
|
||||||
|
]
|
||||||
|
output = format_translations(results)
|
||||||
|
|
||||||
|
assert "en -> es" in output
|
||||||
|
assert "hello" in output
|
||||||
|
assert "hola" in output
|
||||||
|
|
||||||
|
def test_format_multiple_translations(self) -> None:
|
||||||
|
"""Test formatting multiple translations."""
|
||||||
|
results = [
|
||||||
|
TranslationResult("hello", "hola", "en", "es", True),
|
||||||
|
TranslationResult("world", "mundo", "en", "es", True),
|
||||||
|
]
|
||||||
|
output = format_translations(results)
|
||||||
|
|
||||||
|
assert "hello" in output
|
||||||
|
assert "hola" in output
|
||||||
|
assert "world" in output
|
||||||
|
assert "mundo" in output
|
||||||
|
|
||||||
|
def test_format_with_errors(self) -> None:
|
||||||
|
"""Test formatting with failed translations."""
|
||||||
|
results = [
|
||||||
|
TranslationResult("hello", "hola", "en", "es", True),
|
||||||
|
TranslationResult("xyz", "", "en", "es", False, "Unknown word"),
|
||||||
|
]
|
||||||
|
output = format_translations(results, show_errors=True)
|
||||||
|
|
||||||
|
assert "hello" in output
|
||||||
|
assert "Error: Unknown word" in output
|
||||||
|
|
||||||
|
def test_format_hide_errors(self) -> None:
|
||||||
|
"""Test formatting with errors hidden."""
|
||||||
|
results = [
|
||||||
|
TranslationResult("hello", "hola", "en", "es", True),
|
||||||
|
TranslationResult("xyz", "", "en", "es", False, "Unknown word"),
|
||||||
|
]
|
||||||
|
output = format_translations(results, show_errors=False)
|
||||||
|
|
||||||
|
assert "hello" in output
|
||||||
|
assert "Unknown word" not in output
|
||||||
|
|
||||||
|
|
||||||
|
# get_installed_languages tests
|
||||||
|
|
||||||
|
|
||||||
|
class TestGetInstalledLanguages:
|
||||||
|
"""Tests for get_installed_languages function."""
|
||||||
|
|
||||||
|
def test_argos_unavailable(self, mock_argos_unavailable: None) -> None:
|
||||||
|
"""Test when argos is unavailable."""
|
||||||
|
result = get_installed_languages()
|
||||||
|
assert result == []
|
||||||
|
|
||||||
|
def test_returns_languages(self) -> None:
|
||||||
|
"""Test returning installed languages."""
|
||||||
|
mock_lang1 = MagicMock()
|
||||||
|
mock_lang1.code = "en"
|
||||||
|
mock_lang1.name = "English"
|
||||||
|
mock_lang2 = MagicMock()
|
||||||
|
mock_lang2.code = "es"
|
||||||
|
mock_lang2.name = "Spanish"
|
||||||
|
|
||||||
|
with ArgosAvailableMock() as mock:
|
||||||
|
mock.get_installed_languages.return_value = [mock_lang1, mock_lang2]
|
||||||
|
result = get_installed_languages()
|
||||||
|
|
||||||
|
assert ("en", "English") in result
|
||||||
|
assert ("es", "Spanish") in result
|
||||||
|
|
||||||
|
|
||||||
|
# get_available_packages tests
|
||||||
|
|
||||||
|
|
||||||
|
class TestGetAvailablePackages:
|
||||||
|
"""Tests for get_available_packages function."""
|
||||||
|
|
||||||
|
def test_argos_unavailable(self, mock_argos_unavailable: None) -> None:
|
||||||
|
"""Test when argos is unavailable."""
|
||||||
|
result = get_available_packages()
|
||||||
|
assert result == []
|
||||||
|
|
||||||
|
|
||||||
|
# download_languages tests
|
||||||
|
|
||||||
|
|
||||||
|
class TestDownloadLanguages:
|
||||||
|
"""Tests for download_languages function."""
|
||||||
|
|
||||||
|
def test_argos_unavailable(self, mock_argos_unavailable: None) -> None:
|
||||||
|
"""Test when argos is unavailable."""
|
||||||
|
result = download_languages(["en", "es"])
|
||||||
|
assert result == {}
|
||||||
|
|
||||||
|
|
||||||
|
# read_file tests
|
||||||
|
|
||||||
|
|
||||||
|
class TestReadFile:
|
||||||
|
"""Tests for read_file function."""
|
||||||
|
|
||||||
|
def test_read_file(self, tmp_path: Path) -> None:
|
||||||
|
"""Test reading a file."""
|
||||||
|
test_file = tmp_path / "test.txt"
|
||||||
|
test_file.write_text("hello\nworld", encoding="utf-8")
|
||||||
|
|
||||||
|
content = read_file(test_file)
|
||||||
|
|
||||||
|
assert content == "hello\nworld"
|
||||||
|
|
||||||
|
def test_read_file_not_found(self, tmp_path: Path) -> None:
|
||||||
|
"""Test reading non-existent file."""
|
||||||
|
with pytest.raises(FileNotFoundError):
|
||||||
|
read_file(tmp_path / "nonexistent.txt")
|
||||||
|
|
||||||
|
|
||||||
|
# main function tests
|
||||||
|
|
||||||
|
|
||||||
|
class TestMain:
|
||||||
|
"""Tests for main CLI function."""
|
||||||
|
|
||||||
|
def test_argos_unavailable_error(self, mock_argos_unavailable: None) -> None:
|
||||||
|
"""Test error when argos not installed."""
|
||||||
|
result = main(["--text", "hello", "--from", "en", "--to", "es"])
|
||||||
|
assert result == 1
|
||||||
|
|
||||||
|
def test_list_languages_empty(
|
||||||
|
self, capsys: pytest.CaptureFixture[str]
|
||||||
|
) -> None:
|
||||||
|
"""Test listing languages when none installed."""
|
||||||
|
with ArgosAvailableMock() as mock:
|
||||||
|
mock.get_installed_languages.return_value = []
|
||||||
|
result = main(["--list-languages"])
|
||||||
|
|
||||||
|
assert result == 0
|
||||||
|
captured = capsys.readouterr()
|
||||||
|
assert "No languages installed" in captured.out
|
||||||
|
|
||||||
|
def test_list_languages_with_results(
|
||||||
|
self, capsys: pytest.CaptureFixture[str]
|
||||||
|
) -> None:
|
||||||
|
"""Test listing installed languages."""
|
||||||
|
mock_lang = MagicMock()
|
||||||
|
mock_lang.code = "en"
|
||||||
|
mock_lang.name = "English"
|
||||||
|
|
||||||
|
with ArgosAvailableMock() as mock:
|
||||||
|
mock.get_installed_languages.return_value = [mock_lang]
|
||||||
|
result = main(["--list-languages"])
|
||||||
|
|
||||||
|
assert result == 0
|
||||||
|
captured = capsys.readouterr()
|
||||||
|
assert "en" in captured.out
|
||||||
|
assert "English" in captured.out
|
||||||
|
|
||||||
|
def test_translate_single_text(
|
||||||
|
self, capsys: pytest.CaptureFixture[str]
|
||||||
|
) -> None:
|
||||||
|
"""Test translating single text."""
|
||||||
|
with ArgosAvailableMock("hola"):
|
||||||
|
result = main(["--text", "hello", "--from", "en", "--to", "es"])
|
||||||
|
|
||||||
|
assert result == 0
|
||||||
|
captured = capsys.readouterr()
|
||||||
|
assert "hello" in captured.out
|
||||||
|
assert "hola" in captured.out
|
||||||
|
|
||||||
|
def test_translate_multiple_words(
|
||||||
|
self, capsys: pytest.CaptureFixture[str]
|
||||||
|
) -> None:
|
||||||
|
"""Test translating multiple words."""
|
||||||
|
with ArgosAvailableMock(["hola", "mundo"]):
|
||||||
|
result = main(["--words", "hello", "world", "--from", "en", "--to", "es"])
|
||||||
|
|
||||||
|
assert result == 0
|
||||||
|
captured = capsys.readouterr()
|
||||||
|
assert "hello" in captured.out
|
||||||
|
assert "world" in captured.out
|
||||||
|
|
||||||
|
def test_translate_from_file(
|
||||||
|
self,
|
||||||
|
temp_words_file: Path,
|
||||||
|
capsys: pytest.CaptureFixture[str],
|
||||||
|
) -> None:
|
||||||
|
"""Test translating words from file."""
|
||||||
|
with ArgosAvailableMock(["hola", "mundo", "adios"]):
|
||||||
|
result = main(
|
||||||
|
["--words-file", str(temp_words_file), "--from", "en", "--to", "es"]
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result == 0
|
||||||
|
captured = capsys.readouterr()
|
||||||
|
assert "hello" in captured.out
|
||||||
|
assert "world" in captured.out
|
||||||
|
assert "goodbye" in captured.out
|
||||||
|
|
||||||
|
def test_translate_file_not_found(
|
||||||
|
self, capsys: pytest.CaptureFixture[str]
|
||||||
|
) -> None:
|
||||||
|
"""Test error when words file not found."""
|
||||||
|
with ArgosAvailableMock():
|
||||||
|
result = main(
|
||||||
|
["--words-file", "/nonexistent/file.txt", "--from", "en", "--to", "es"]
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result == 1
|
||||||
|
captured = capsys.readouterr()
|
||||||
|
assert "File not found" in captured.err
|
||||||
|
|
||||||
|
def test_translate_output_to_file(
|
||||||
|
self,
|
||||||
|
tmp_path: Path,
|
||||||
|
capsys: pytest.CaptureFixture[str],
|
||||||
|
) -> None:
|
||||||
|
"""Test outputting translations to file."""
|
||||||
|
output_file = tmp_path / "output.txt"
|
||||||
|
|
||||||
|
with ArgosAvailableMock("hola"):
|
||||||
|
result = main(
|
||||||
|
[
|
||||||
|
"--text",
|
||||||
|
"hello",
|
||||||
|
"--from",
|
||||||
|
"en",
|
||||||
|
"--to",
|
||||||
|
"es",
|
||||||
|
"--output",
|
||||||
|
str(output_file),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result == 0
|
||||||
|
assert output_file.exists()
|
||||||
|
content = output_file.read_text(encoding="utf-8")
|
||||||
|
assert "hello" in content
|
||||||
|
assert "hola" in content
|
||||||
|
|
||||||
|
def test_no_input_shows_help(
|
||||||
|
self, capsys: pytest.CaptureFixture[str]
|
||||||
|
) -> None:
|
||||||
|
"""Test that no input shows help."""
|
||||||
|
with ArgosAvailableMock():
|
||||||
|
result = main([])
|
||||||
|
|
||||||
|
assert result == 1
|
||||||
|
|
||||||
|
def test_translation_failure_returns_error(
|
||||||
|
self, mock_all_translators_unavailable: None
|
||||||
|
) -> None:
|
||||||
|
"""Test that translation failure returns error code when no backends."""
|
||||||
|
result = main(["--text", "hello", "--from", "en", "--to", "es"])
|
||||||
|
assert result == 1
|
||||||
|
|
||||||
|
|
||||||
|
# Integration-style tests (still mocked but testing more flow)
|
||||||
|
|
||||||
|
|
||||||
|
class TestIntegration:
|
||||||
|
"""Integration-style tests for translator."""
|
||||||
|
|
||||||
|
def test_full_translation_flow(self) -> None:
|
||||||
|
"""Test complete translation flow."""
|
||||||
|
with ArgosAvailableMock(["uno", "dos", "tres"]):
|
||||||
|
words = ["one", "two", "three"]
|
||||||
|
results = translate_words(words, "en", "es")
|
||||||
|
|
||||||
|
assert all(r.success for r in results)
|
||||||
|
assert [r.translated_word for r in results] == ["uno", "dos", "tres"]
|
||||||
|
|
||||||
|
output = format_translations(results)
|
||||||
|
assert "en -> es" in output
|
||||||
|
assert "one" in output
|
||||||
|
assert "uno" in output
|
||||||
|
|
||||||
|
def test_mixed_success_failure(
|
||||||
|
self, mock_all_translators_unavailable: None
|
||||||
|
) -> None:
|
||||||
|
"""Test handling when no translation backends are available."""
|
||||||
|
results = translate_words(["hello", "xyz", "world"], "en", "es")
|
||||||
|
|
||||||
|
# All should fail when no backends available
|
||||||
|
assert all(not r.success for r in results)
|
||||||
|
|
||||||
|
output = format_translations(results)
|
||||||
|
assert "Error" in output
|
||||||
572
python_pkg/word_frequency/translator.py
Normal file
572
python_pkg/word_frequency/translator.py
Normal file
@ -0,0 +1,572 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Translator - translates words/text between languages.
|
||||||
|
|
||||||
|
This module provides translation capabilities using either:
|
||||||
|
1. Argos Translate (offline, requires large downloads) - preferred if installed
|
||||||
|
2. deep-translator (online, uses Google Translate) - lightweight fallback
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
# Translate a single word
|
||||||
|
python -m python_pkg.word_frequency.translator --text "hello" --from en --to es
|
||||||
|
|
||||||
|
# Translate multiple words
|
||||||
|
python -m python_pkg.word_frequency.translator --words hello world goodbye --from en --to pl
|
||||||
|
|
||||||
|
# Translate words from a file (one word per line)
|
||||||
|
python -m python_pkg.word_frequency.translator --words-file words.txt --from la --to en
|
||||||
|
|
||||||
|
# List available languages
|
||||||
|
python -m python_pkg.word_frequency.translator --list-languages
|
||||||
|
|
||||||
|
# Output to file
|
||||||
|
python -m python_pkg.word_frequency.translator --words-file vocab.txt --from pl --to en --output translations.txt
|
||||||
|
|
||||||
|
Dependencies (install one):
|
||||||
|
pip install deep-translator # Lightweight, uses Google Translate (online)
|
||||||
|
pip install argostranslate # Offline translation (requires ~3GB downloads)
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import TYPE_CHECKING, NamedTuple
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from collections.abc import Sequence
|
||||||
|
|
||||||
|
# Lazy imports for translation backends (may not be installed)
|
||||||
|
_argos_available: bool | None = None
|
||||||
|
_deep_translator_available: bool | None = None
|
||||||
|
_langdetect_available: bool | None = None
|
||||||
|
|
||||||
|
|
||||||
|
def _check_argos() -> bool:
|
||||||
|
"""Check if argostranslate is available."""
|
||||||
|
global _argos_available
|
||||||
|
if _argos_available is None:
|
||||||
|
try:
|
||||||
|
import argostranslate.package # noqa: F401
|
||||||
|
import argostranslate.translate # noqa: F401
|
||||||
|
|
||||||
|
_argos_available = True
|
||||||
|
except ImportError:
|
||||||
|
_argos_available = False
|
||||||
|
return _argos_available
|
||||||
|
|
||||||
|
|
||||||
|
def _check_deep_translator() -> bool:
|
||||||
|
"""Check if deep-translator is available."""
|
||||||
|
global _deep_translator_available
|
||||||
|
if _deep_translator_available is None:
|
||||||
|
try:
|
||||||
|
from deep_translator import GoogleTranslator # noqa: F401
|
||||||
|
|
||||||
|
_deep_translator_available = True
|
||||||
|
except ImportError:
|
||||||
|
_deep_translator_available = False
|
||||||
|
return _deep_translator_available
|
||||||
|
|
||||||
|
|
||||||
|
def _check_langdetect() -> bool:
|
||||||
|
"""Check if langdetect is available."""
|
||||||
|
global _langdetect_available
|
||||||
|
if _langdetect_available is None:
|
||||||
|
try:
|
||||||
|
import langdetect # noqa: F401
|
||||||
|
|
||||||
|
_langdetect_available = True
|
||||||
|
except ImportError:
|
||||||
|
_langdetect_available = False
|
||||||
|
return _langdetect_available
|
||||||
|
|
||||||
|
|
||||||
|
def detect_language(text: str) -> str | None:
|
||||||
|
"""Detect the language of a text.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: The text to analyze.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ISO 639-1 language code (e.g., 'en', 'la', 'pl') or None if detection fails.
|
||||||
|
"""
|
||||||
|
if not _check_langdetect():
|
||||||
|
return None
|
||||||
|
|
||||||
|
import langdetect
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Use a sample of the text for detection (faster and more reliable)
|
||||||
|
sample = text[:5000] if len(text) > 5000 else text
|
||||||
|
return langdetect.detect(sample) # type: ignore[no-any-return]
|
||||||
|
except langdetect.LangDetectException: # type: ignore[attr-defined]
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
class TranslationResult(NamedTuple):
|
||||||
|
"""Result of a translation."""
|
||||||
|
|
||||||
|
source_word: str
|
||||||
|
translated_word: str
|
||||||
|
source_lang: str
|
||||||
|
target_lang: str
|
||||||
|
success: bool
|
||||||
|
error: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_installed_languages() -> list[tuple[str, str]]:
|
||||||
|
"""Get list of installed languages.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of (code, name) tuples for installed languages.
|
||||||
|
"""
|
||||||
|
if not _check_argos():
|
||||||
|
return []
|
||||||
|
|
||||||
|
import argostranslate.translate
|
||||||
|
|
||||||
|
languages = argostranslate.translate.get_installed_languages()
|
||||||
|
return [(lang.code, lang.name) for lang in languages]
|
||||||
|
|
||||||
|
|
||||||
|
def get_available_packages() -> list[tuple[str, str, str, str]]:
|
||||||
|
"""Get list of available language packages for download.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of (from_code, from_name, to_code, to_name) tuples.
|
||||||
|
"""
|
||||||
|
if not _check_argos():
|
||||||
|
return []
|
||||||
|
|
||||||
|
import argostranslate.package
|
||||||
|
|
||||||
|
argostranslate.package.update_package_index()
|
||||||
|
available = argostranslate.package.get_available_packages()
|
||||||
|
return [
|
||||||
|
(pkg.from_code, pkg.from_name, pkg.to_code, pkg.to_name) for pkg in available
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def download_languages(lang_codes: Sequence[str]) -> dict[str, bool]:
|
||||||
|
"""Download language packages for the specified languages.
|
||||||
|
|
||||||
|
Downloads packages for translation between English and the specified languages,
|
||||||
|
and between each pair of specified languages if available.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
lang_codes: List of language codes to download (e.g., ['en', 'es', 'pl']).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict mapping "from->to" to success boolean.
|
||||||
|
"""
|
||||||
|
if not _check_argos():
|
||||||
|
return {}
|
||||||
|
|
||||||
|
import argostranslate.package
|
||||||
|
|
||||||
|
results: dict[str, bool] = {}
|
||||||
|
|
||||||
|
# Update package index
|
||||||
|
print("Updating package index...") # noqa: T201
|
||||||
|
argostranslate.package.update_package_index()
|
||||||
|
available = argostranslate.package.get_available_packages()
|
||||||
|
|
||||||
|
# Create a lookup for available packages
|
||||||
|
available_lookup: dict[tuple[str, str], object] = {}
|
||||||
|
for pkg in available:
|
||||||
|
available_lookup[(pkg.from_code, pkg.to_code)] = pkg
|
||||||
|
|
||||||
|
# Download packages for all requested language pairs
|
||||||
|
lang_codes_set = set(lang_codes)
|
||||||
|
|
||||||
|
for from_code in lang_codes_set:
|
||||||
|
for to_code in lang_codes_set:
|
||||||
|
if from_code == to_code:
|
||||||
|
continue
|
||||||
|
|
||||||
|
key = f"{from_code}->{to_code}"
|
||||||
|
pkg_key = (from_code, to_code)
|
||||||
|
|
||||||
|
if pkg_key in available_lookup:
|
||||||
|
pkg = available_lookup[pkg_key]
|
||||||
|
try:
|
||||||
|
print(f"Downloading {from_code} -> {to_code}...") # noqa: T201
|
||||||
|
argostranslate.package.install_from_path(pkg.download())
|
||||||
|
results[key] = True
|
||||||
|
print(f" ✓ Installed {from_code} -> {to_code}") # noqa: T201
|
||||||
|
except Exception as e: # noqa: BLE001
|
||||||
|
results[key] = False
|
||||||
|
print(f" ✗ Failed {from_code} -> {to_code}: {e}") # noqa: T201
|
||||||
|
else:
|
||||||
|
# Package not available
|
||||||
|
results[key] = False
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def translate_word(
|
||||||
|
word: str,
|
||||||
|
from_lang: str,
|
||||||
|
to_lang: str,
|
||||||
|
) -> TranslationResult:
|
||||||
|
"""Translate a single word.
|
||||||
|
|
||||||
|
Uses argostranslate if available (offline), otherwise falls back to
|
||||||
|
deep-translator (Google Translate, online).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
word: The word to translate.
|
||||||
|
from_lang: Source language code (e.g., 'en', 'pl', 'la').
|
||||||
|
to_lang: Target language code.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
TranslationResult with the translation.
|
||||||
|
"""
|
||||||
|
# Try argostranslate first (offline)
|
||||||
|
if _check_argos():
|
||||||
|
import argostranslate.translate
|
||||||
|
|
||||||
|
try:
|
||||||
|
translated = argostranslate.translate.translate(word, from_lang, to_lang)
|
||||||
|
return TranslationResult(
|
||||||
|
source_word=word,
|
||||||
|
translated_word=translated,
|
||||||
|
source_lang=from_lang,
|
||||||
|
target_lang=to_lang,
|
||||||
|
success=True,
|
||||||
|
)
|
||||||
|
except Exception as e: # noqa: BLE001
|
||||||
|
# Fall through to try deep-translator
|
||||||
|
argos_error = str(e)
|
||||||
|
else:
|
||||||
|
argos_error = None
|
||||||
|
|
||||||
|
# Try deep-translator (online via Google Translate)
|
||||||
|
if _check_deep_translator():
|
||||||
|
from deep_translator import GoogleTranslator
|
||||||
|
|
||||||
|
try:
|
||||||
|
translator = GoogleTranslator(source=from_lang, target=to_lang)
|
||||||
|
translated = translator.translate(word)
|
||||||
|
return TranslationResult(
|
||||||
|
source_word=word,
|
||||||
|
translated_word=translated or "",
|
||||||
|
source_lang=from_lang,
|
||||||
|
target_lang=to_lang,
|
||||||
|
success=True,
|
||||||
|
)
|
||||||
|
except Exception as e: # noqa: BLE001
|
||||||
|
return TranslationResult(
|
||||||
|
source_word=word,
|
||||||
|
translated_word="",
|
||||||
|
source_lang=from_lang,
|
||||||
|
target_lang=to_lang,
|
||||||
|
success=False,
|
||||||
|
error=str(e),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Neither backend available
|
||||||
|
error_msg = "No translation backend available. Install: pip install deep-translator"
|
||||||
|
if argos_error:
|
||||||
|
error_msg = f"argostranslate error: {argos_error}"
|
||||||
|
return TranslationResult(
|
||||||
|
source_word=word,
|
||||||
|
translated_word="",
|
||||||
|
source_lang=from_lang,
|
||||||
|
target_lang=to_lang,
|
||||||
|
success=False,
|
||||||
|
error=error_msg,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def translate_words(
|
||||||
|
words: Sequence[str],
|
||||||
|
from_lang: str,
|
||||||
|
to_lang: str,
|
||||||
|
) -> list[TranslationResult]:
|
||||||
|
"""Translate multiple words.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
words: List of words to translate.
|
||||||
|
from_lang: Source language code.
|
||||||
|
to_lang: Target language code.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of TranslationResult for each word.
|
||||||
|
"""
|
||||||
|
return [translate_word(word, from_lang, to_lang) for word in words]
|
||||||
|
|
||||||
|
|
||||||
|
def translate_words_batch(
|
||||||
|
words: Sequence[str],
|
||||||
|
from_lang: str,
|
||||||
|
to_lang: str,
|
||||||
|
) -> list[TranslationResult]:
|
||||||
|
"""Translate multiple words, attempting batch translation for efficiency.
|
||||||
|
|
||||||
|
For better results with context, this joins words and translates together,
|
||||||
|
then splits. Falls back to word-by-word if batch fails.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
words: List of words to translate.
|
||||||
|
from_lang: Source language code.
|
||||||
|
to_lang: Target language code.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of TranslationResult for each word.
|
||||||
|
"""
|
||||||
|
if not words:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# For single words or small batches, just translate individually
|
||||||
|
if len(words) <= 3:
|
||||||
|
return translate_words(words, from_lang, to_lang)
|
||||||
|
|
||||||
|
# Try batch translation by joining with newlines
|
||||||
|
if not _check_argos():
|
||||||
|
return translate_words(words, from_lang, to_lang)
|
||||||
|
|
||||||
|
import argostranslate.translate
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Join words with newlines for batch translation
|
||||||
|
batch_text = "\n".join(words)
|
||||||
|
translated_batch = argostranslate.translate.translate(
|
||||||
|
batch_text, from_lang, to_lang
|
||||||
|
)
|
||||||
|
translated_words = translated_batch.split("\n")
|
||||||
|
|
||||||
|
# If we got the same number of translations, use them
|
||||||
|
if len(translated_words) == len(words):
|
||||||
|
return [
|
||||||
|
TranslationResult(
|
||||||
|
source_word=word,
|
||||||
|
translated_word=trans.strip(),
|
||||||
|
source_lang=from_lang,
|
||||||
|
target_lang=to_lang,
|
||||||
|
success=True,
|
||||||
|
)
|
||||||
|
for word, trans in zip(words, translated_words, strict=True)
|
||||||
|
]
|
||||||
|
except Exception: # noqa: BLE001, S110
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Fall back to individual translation
|
||||||
|
return translate_words(words, from_lang, to_lang)
|
||||||
|
|
||||||
|
|
||||||
|
def format_translations(
|
||||||
|
results: list[TranslationResult],
|
||||||
|
*,
|
||||||
|
show_errors: bool = True,
|
||||||
|
) -> str:
|
||||||
|
"""Format translation results as a table.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
results: List of TranslationResult to format.
|
||||||
|
show_errors: If True, show error messages for failed translations.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Formatted string with translations.
|
||||||
|
"""
|
||||||
|
if not results:
|
||||||
|
return "No translations."
|
||||||
|
|
||||||
|
lines: list[str] = []
|
||||||
|
|
||||||
|
# Find max widths
|
||||||
|
max_source = max(len(r.source_word) for r in results)
|
||||||
|
max_source = max(max_source, 6) # "Source" header
|
||||||
|
|
||||||
|
successful_lengths = [len(r.translated_word) for r in results if r.success]
|
||||||
|
max_trans = max(successful_lengths) if successful_lengths else 0
|
||||||
|
max_trans = max(max_trans, 11) # "Translation" header minimum
|
||||||
|
|
||||||
|
# Header
|
||||||
|
from_lang = results[0].source_lang
|
||||||
|
to_lang = results[0].target_lang
|
||||||
|
lines.append(f"Translation: {from_lang} -> {to_lang}")
|
||||||
|
lines.append("")
|
||||||
|
lines.append(f"{'Source':<{max_source}} {'Translation':<{max_trans}}")
|
||||||
|
lines.append("-" * (max_source + max_trans + 2))
|
||||||
|
|
||||||
|
# Data
|
||||||
|
for r in results:
|
||||||
|
if r.success:
|
||||||
|
lines.append(f"{r.source_word:<{max_source}} {r.translated_word:<{max_trans}}")
|
||||||
|
elif show_errors:
|
||||||
|
error_msg = f"[Error: {r.error}]" if r.error else "[Failed]"
|
||||||
|
lines.append(f"{r.source_word:<{max_source}} {error_msg}")
|
||||||
|
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def read_file(filepath: str | Path) -> str:
|
||||||
|
"""Read text content from a file."""
|
||||||
|
return Path(filepath).read_text(encoding="utf-8")
|
||||||
|
|
||||||
|
|
||||||
|
def main(argv: Sequence[str] | None = None) -> int:
|
||||||
|
"""Main entry point for the translator.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
argv: Command line arguments.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Exit code.
|
||||||
|
"""
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Offline translator using Argos Translate.",
|
||||||
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||||
|
epilog=__doc__,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Actions
|
||||||
|
action_group = parser.add_mutually_exclusive_group()
|
||||||
|
action_group.add_argument(
|
||||||
|
"--list-languages",
|
||||||
|
"-l",
|
||||||
|
action="store_true",
|
||||||
|
help="List installed languages",
|
||||||
|
)
|
||||||
|
action_group.add_argument(
|
||||||
|
"--list-available",
|
||||||
|
"-L",
|
||||||
|
action="store_true",
|
||||||
|
help="List available language packages for download",
|
||||||
|
)
|
||||||
|
action_group.add_argument(
|
||||||
|
"--download",
|
||||||
|
"-d",
|
||||||
|
nargs="+",
|
||||||
|
metavar="LANG",
|
||||||
|
help="Download language packs (e.g., --download en es pl)",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Input
|
||||||
|
input_group = parser.add_mutually_exclusive_group()
|
||||||
|
input_group.add_argument(
|
||||||
|
"--text",
|
||||||
|
"-t",
|
||||||
|
type=str,
|
||||||
|
help="Single text/word to translate",
|
||||||
|
)
|
||||||
|
input_group.add_argument(
|
||||||
|
"--words",
|
||||||
|
"-w",
|
||||||
|
nargs="+",
|
||||||
|
help="Words to translate",
|
||||||
|
)
|
||||||
|
input_group.add_argument(
|
||||||
|
"--words-file",
|
||||||
|
"-W",
|
||||||
|
type=str,
|
||||||
|
help="File with words to translate (one per line)",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Language options
|
||||||
|
parser.add_argument(
|
||||||
|
"--from",
|
||||||
|
"-f",
|
||||||
|
dest="from_lang",
|
||||||
|
type=str,
|
||||||
|
default="en",
|
||||||
|
help="Source language code (default: en)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--to",
|
||||||
|
"-T",
|
||||||
|
dest="to_lang",
|
||||||
|
type=str,
|
||||||
|
default="en",
|
||||||
|
help="Target language code (default: en)",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Output
|
||||||
|
parser.add_argument(
|
||||||
|
"--output",
|
||||||
|
"-o",
|
||||||
|
type=str,
|
||||||
|
help="Output file path",
|
||||||
|
)
|
||||||
|
|
||||||
|
args = parser.parse_args(argv)
|
||||||
|
|
||||||
|
# Check if argostranslate is available
|
||||||
|
if not _check_argos():
|
||||||
|
print( # noqa: T201
|
||||||
|
"Error: argostranslate is not installed.\n"
|
||||||
|
"Install it with: pip install argostranslate",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Handle list-languages
|
||||||
|
if args.list_languages:
|
||||||
|
langs = get_installed_languages()
|
||||||
|
if not langs:
|
||||||
|
print("No languages installed.") # noqa: T201
|
||||||
|
print("Download some with: --download en es pl de fr") # noqa: T201
|
||||||
|
else:
|
||||||
|
print("Installed languages:") # noqa: T201
|
||||||
|
for code, name in sorted(langs):
|
||||||
|
print(f" {code}: {name}") # noqa: T201
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Handle list-available
|
||||||
|
if args.list_available:
|
||||||
|
packages = get_available_packages()
|
||||||
|
if not packages:
|
||||||
|
print("No packages available (check internet connection).") # noqa: T201
|
||||||
|
else:
|
||||||
|
print("Available language packages:") # noqa: T201
|
||||||
|
for from_code, from_name, to_code, to_name in sorted(packages):
|
||||||
|
print(f" {from_code} ({from_name}) -> {to_code} ({to_name})") # noqa: T201
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Handle download
|
||||||
|
if args.download:
|
||||||
|
results = download_languages(args.download)
|
||||||
|
success_count = sum(1 for v in results.values() if v)
|
||||||
|
print(f"\nDownloaded {success_count}/{len(results)} language pairs.") # noqa: T201
|
||||||
|
return 0 if success_count > 0 else 1
|
||||||
|
|
||||||
|
# Handle translation
|
||||||
|
words: list[str] = []
|
||||||
|
if args.text:
|
||||||
|
words = [args.text]
|
||||||
|
elif args.words:
|
||||||
|
words = args.words
|
||||||
|
elif args.words_file:
|
||||||
|
try:
|
||||||
|
content = read_file(args.words_file)
|
||||||
|
words = [w.strip() for w in content.splitlines() if w.strip()]
|
||||||
|
except FileNotFoundError:
|
||||||
|
print(f"Error: File not found: {args.words_file}", file=sys.stderr) # noqa: T201
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if not words:
|
||||||
|
parser.print_help()
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Translate
|
||||||
|
results = translate_words_batch(words, args.from_lang, args.to_lang)
|
||||||
|
output = format_translations(results)
|
||||||
|
|
||||||
|
# Output
|
||||||
|
if args.output:
|
||||||
|
Path(args.output).write_text(output, encoding="utf-8")
|
||||||
|
print(f"Translations written to {args.output}") # noqa: T201
|
||||||
|
else:
|
||||||
|
print(output) # noqa: T201
|
||||||
|
|
||||||
|
# Return error if any translation failed
|
||||||
|
if any(not r.success for r in results):
|
||||||
|
return 1
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.exit(main())
|
||||||
Loading…
Reference in New Issue
Block a user