From d2b6f0018535dc5befed4e90a998bb1a3a9655b2 Mon Sep 17 00:00:00 2001 From: Krzysztof Rudnicki Date: Mon, 29 Dec 2025 14:41:56 +0100 Subject: [PATCH] feat: automatic language detection translation and anki generator with cache --- C/vocabulary_curve/main.c | 58 +- C/vocabulary_curve/vocabulary_curve | Bin 16768 -> 16768 bytes python_pkg/word_frequency/anki_generator.py | 321 ++- python_pkg/word_frequency/cache.py | 641 ++++++ .../word_frequency/run_anki_generator.sh | 153 ++ .../test_texts/polish_pan_tadeusz_anki_20.txt | 2040 +++++++++++++++++ .../tests/test_anki_generator.py | 63 +- .../tests/test_learning_pipe.py | 62 +- .../word_frequency/tests/test_translator.py | 330 ++- python_pkg/word_frequency/translator.py | 457 +++- 10 files changed, 3826 insertions(+), 299 deletions(-) create mode 100644 python_pkg/word_frequency/cache.py create mode 100755 python_pkg/word_frequency/run_anki_generator.sh create mode 100644 python_pkg/word_frequency/test_texts/polish_pan_tadeusz_anki_20.txt diff --git a/C/vocabulary_curve/main.c b/C/vocabulary_curve/main.c index 0a76857..f28c2c5 100644 --- a/C/vocabulary_curve/main.c +++ b/C/vocabulary_curve/main.c @@ -158,9 +158,20 @@ static void assign_ranks(void) { /* Sort all_entries by frequency (this doesn't affect word_sequence) */ qsort(all_entries, num_unique_words, sizeof(WordEntry *), compare_by_count); - /* Assign 1-indexed ranks */ + /* Assign 1-indexed ranks using competition ranking: + * Words with same frequency get same rank. + * Next rank is current_position + 1 (skipping numbers). + * Example: counts 5,3,3,2 -> ranks 1,2,2,4 (not 1,2,3,4) */ for (int i = 0; i < num_unique_words; i++) { - all_entries[i]->rank = i + 1; + if (i == 0) { + all_entries[i]->rank = 1; + } else if (all_entries[i]->count == all_entries[i-1]->count) { + /* Same frequency as previous word - same rank */ + all_entries[i]->rank = all_entries[i-1]->rank; + } else { + /* Different frequency - rank is position + 1 */ + all_entries[i]->rank = i + 1; + } } } @@ -306,20 +317,42 @@ static void cleanup(void) { } } +/* Dump all vocabulary with ranks (for Python integration) */ +static void dump_vocabulary(int max_rank) { + printf("VOCAB_DUMP_START\n"); + for (int i = 0; i < num_unique_words; i++) { + if (all_entries[i]->rank <= max_rank) { + printf("%s;%d\n", all_entries[i]->word, all_entries[i]->rank); + } + } + printf("VOCAB_DUMP_END\n"); +} + int main(int argc, char *argv[]) { if (argc < 2) { - fprintf(stderr, "Usage: %s [max_length]\n", argv[0]); + fprintf(stderr, "Usage: %s [max_length] [--dump-vocab [max_rank]]\n", argv[0]); fprintf(stderr, " max_length: maximum excerpt length to analyze (default: 30)\n"); + fprintf(stderr, " --dump-vocab: output all words with ranks up to max_rank\n"); return 1; } const char *filename = argv[1]; int max_length = 30; + bool dump_vocab = false; + int dump_max_rank = 0; - if (argc >= 3) { - max_length = atoi(argv[2]); - if (max_length < 1) max_length = 1; - if (max_length > 1000) max_length = 1000; + /* Parse arguments */ + for (int i = 2; i < argc; i++) { + if (strcmp(argv[i], "--dump-vocab") == 0) { + dump_vocab = true; + if (i + 1 < argc && argv[i + 1][0] != '-') { + dump_max_rank = atoi(argv[++i]); + } + } else if (argv[i][0] != '-') { + max_length = atoi(argv[i]); + if (max_length < 1) max_length = 1; + if (max_length > 1000) max_length = 1000; + } } /* Initialize hash table */ @@ -351,6 +384,17 @@ int main(int argc, char *argv[]) { /* Print results */ print_results(results, max_length); + /* Dump vocabulary if requested */ + if (dump_vocab) { + /* If no max_rank specified, use the max from the excerpt */ + if (dump_max_rank == 0 && max_length > 0) { + dump_max_rank = results[max_length - 1].min_vocab_needed; + } + if (dump_max_rank > 0) { + dump_vocabulary(dump_max_rank); + } + } + /* Cleanup */ free(results); cleanup(); diff --git a/C/vocabulary_curve/vocabulary_curve b/C/vocabulary_curve/vocabulary_curve index ee232fae6092c1cd9dcfb98406f63612442771e2..873f3472a0ddea7f9d70bc65bb186b8820c759fd 100755 GIT binary patch delta 3329 zcmY*c4R93I9pAm=;F4alp(G)MgIvf0i4e$XqB)bpY}m8!=n|twOZhMjhg2m%fIG1o z*x)6OJL>|v)KUAv&~c_r>oC)XaV$D=kS}WeDomXe6zK%ZyT%|29s#1;-`j%_cjoTB z_y73)-~Z$N-)={Y?}+hx3LGELc(L$8W=l3_KP}njkNJg}xz5xw*J&>0lT$8>spa@+ z-tTCha;PLl9Usl3j?Z%oxAe|lajrK1-1_o*#YV+3rJWZ=@H-Jt_IYKx%ZQ z2CmK1{j_UQhU(7XI7Q##idhb!^BF7#WU-pbyzRhWNJ~oESidY5af((eN=IgaC=}Ik zBs@%vs-MDvW%_~(iuM&f^}S_j$BZR>*NLbk`FnS44(aU|Tpjn63!Rb49CtVqs=j2b z$3Z`WD8uA6Srx0-2}Lr;N&ZjOKL@q%6}?s*Ub1eF)Ull_*Lou7eMwuBnRo?yA#LC- zn#SY@WZ}Niw|JliyDV8~7#)7w2E74txZsHb*ds>Z>x0WV>VYnH{KqV~t66k|4b@B3 zuDHPe;FlPd%|wE3%5VLqiD}*>WtzveeA-tJ;fQ0*iYV|$el-@{xYh$Yb zkG|eDB6KxFo7oJ4uIHs;CtW9m&JZF;!c{rHQaEB{X~@rEmO*$m6C~<_u&mGd`Y8oL zIJZL~y3+~zK%LCuBE`n|NOjOqME)(7O~r724oN6~%m%%3Af)QKaqX!7NS%L0T{KN^ zkck!7mdc9%c=T*2+)vhzx``%>b#xIdu~`LS5!;7tNyaLX*UMLI5&+dCR>)dJtkqVD zr6hb>GVbLOPsY<)#R6E|O|v+nHHn019uYWRJ9C1}a_^VL0^xBdFym8iQ(dxlQnLC&R=;HRO8$}P zULERecRAJveOEUgx8fvr%-uV*d3FyE8?L^qpVVL0R*cnPLqb=L#cCvwiyD!2I78Q5 zw+UT8;yC(nYJSCy`BKb}uzIH66txU0v{FqiR%79d({Q#5){3QyQ6+BI2g7u`ss zi~b2lCI9Eb?qE8{rChx$wWo6s$Nn{}g8HK_1lT3>XJ!9cVV8P4gpK9woXy8}VXr{; z4@ts(pK2a46#i5(YR?c-(K`U^_lVnvfLq`n+_8j& zPnHb=@Kh_J$A3%=&nJo@_6tQF2tRkki$43uw~hj_F||bslHwnVo(8D~ zBx20`HBjgt&_OCsNgcyVceY0r)-1HIj7FY>PtbQ56|Rvq8Fgn=_V`ZUsI(Gu-_-n{ zg!IWI+$eXn=W$##yI;=Zpmq{YXz8DXVgs&_|4Q_9>^d*(x*su7jI6C?h_W^)8xLB4 zV`o#*K13fi2lDHkPs_4jHM6J9Q}b}X^@nA;Z`87&9JyONtr&CuU5Z9*c+OzYW2{rf z?MOWXnGmX!IsBPTW}1(J%6vX)=PVTKbJK6GfK=1U3Va(5v7)0kpwkWaX%allzp_IS zQjJ#8BI`%oB&}7fKuOSPfYYPt5cxJ0tw|tswBtGuI&0xZtR0s{bUHz(>g-Y^)^V~v z?$+hBbfmx9<_o)~Kol7DY?~wWSs0B4p4qB!oLZVlAQ%X3Ck7R>bqjAcU}E+8%YOzb zVOKxPvf+FJ3v?|CNBvD=b^B+)6l~Y&CSVSKVBaKNBT=_+B(WVCwAUm&LX38?v0q-$ z$kCT^cUX0GL_0>p&d(kfxIoeV$(*qJRcui;2p`gN2Ye%1Ru5Q-yMd9qsCp6&@cf5R z%0wvh7SuLq0ZGeJK||Nx%LXm?XyOqtu;m6cPpT#{*W1Mk8f3GubK@4JnCpt~C_$OV zwy?Iapyx)F6TQZ+aZl3BEbyqev1sn>J%FIBTrI7VR!fga%{?duH^6_7Ys42XJ3|Rz z6Msl`q~Cx@uY7A;-)jrdGO%^$(B>bOeF;-K$XdOgHE%91n`toeF6^=N9Y@QdJCmwq z*}XflhP6vQpXh<_<}iV(Cz+Wk%pE=Je;^%RJVARuW6CM!2aH)Wf!WKL^S~T_9l8e@ zd!psLoM5}aW*4N|2uBi6!?^G7_M3_HE_*A{6tOySXR4n2S%fo~*kbrX7ctNEz;eJU z+BIzS1kNOS*e{uc&-Nm#hS{7xVM|?1<}@`SGdvZ=cdCsQU+o|ftsc7LGTPM410bni zBrf8L7dlIED0GE>9?HbUe{6=hxWaAC2VU);+FrBk0;*`O}{J2A2YE_WmXJaGLXZ$RRHKH=3(GWuZ4`GoJ#NP%DS?Z-$ZPT*5`k%7-SQ zi`4E;i48Ha@o3Wom?^Q3zUltR^B4XlZ|*cAiQ3+k8(N-f-qNyh{p!_dW9%_+FTS_V z>zycdw|7%C(iV+)TUuMa+cs@kyVbkx@yKIdrnl7_ZSzJpd2iyRP|Tgh0dr4rM)$kL zTlm?nUs~2$K)*j?SzluGuvUz50Z#o37;CSZ50uQEejFu=lZ97|$vJFbX{=-u^Tm>R z`L93;m%jZF{I}t^chvl_Wa*?q;4jTIXNhwQ4!AP2tEKD;DZ8{j+gqPqP@kPspY4*m zSBd9bnKxhg+dSr}@`aA3S>2fnla9i(H@*aJ!n5!@xGe6Xr@PQMiRTY>7vEmQGx7R6 z8hCu7m@ie{=~$ZC{a)qEF2_IJ=9<95%qDi!@HEXbUklvJA2R_M7!yGmV?(M&r@X3}Xg?ZoP(tzp`TIY48l#!+LaM6EW)N0DeUlwc9JzqcnOW#)MA z{on8Z|NXz-d)pKhnxaB$wryR`FLORmz3L>^-&LpaEulQMIVrJhPBNAXgA>jQY&#^Xm+mmp#% zN6d(+ZRHhvKd-J_dLC(8g~ zQ0=r*@g?ZYxMr^by`uh5qKf)&wN9EtLp_S_djwK2H}|@b!#c^KcFJn_DQN*mR;#ik z`Sm3N#!}0P`qnCl#JxA*&yHRpaTe@eBf1=tlw6RZ{lX4z7|MoUwF8 zqF46JKm!Sk@&b6;d49zLnUET7LlRO@=xGP{LHOg>9+cH06>kl#ep2V<%qy!FBA8?1+c!FAE{4&BoeSmvbWDKuDHY-BN)1* z=+b50B-ygsw@ker)G8$KE0#qw9*lN4SRYutK>zd_EKAGOo8Io7i_yYm-LSd`R;h4? z>UkFweP)+fK#mon2DCmJcd$pff7-SoSR6)Wkj>X1QM`SPJrI2c5!LgT!2+X+NV*fK z?O18{if0vmF{ronT7ibnvM`q-Habq0vZ-DX_w>{o&aC-0NfHg!%1t{m2x(03k~0Y4 z=it1Wbb$vD^bRy$j1CLpo}VBYitd{G0J5ug%lc#HyZjg`>S?jS_<2^qzDbt7Ta43L z(<3KSa0Uo#p{UHE6?N)-SfC-{Od7Q15m}{1BsrQq{ zb5^Z_(GyN~WC+n$?clu~!P!wX-Vtc(1yH<<(Ai-CFwaSgSjP1_$)uZmoQlw=&0`;7 zQ`?q={i%t9cW`s+Ev9g-ki9XC+ogSHF><;aQ>xF5p?eQz59X>ckDLy4tYSyA|UtDJxNkS`L3JOuI8lf~=2-|L@?PT7 z_Ko2BWpO~P0D6CA;WDY_TMQ@;#fzNgF;1hoW^(OC&>mGb_~r}P>^qKa7vJW_N4HgO zdjq!lBXejFuvg&h+jvb_WEYmbL+Bj#Tg;$+W*W37e?vP5+MXU(%FR!rx_!$tXasFx zH#OsI5_G)zK1d?hdmqBxDmITGExnJxAbf9zEsAGTul&N!Q^yR>mJ8hI(*!ZU7&B#&|q`s_p!Mu|2;i!{49~|E_zlz95junq##v#ZC_xf5^|wN*=&vidV4fp~XbC zJAi}NMGeNK{IcMD5Wux<=+m}L=vb9ut(8FW47nE*un>ohm4Iy=gRK~dt(bCH zIFi;R>?^D^0)-y~NsPrs#X_p_v!W%&#A2UOSez1DUi^$ueD|hluE&1e=caiR<3ip6 ze8j2w1x97axHfg#WQLj`w|Ef8Pv8T5AQkwv8b#9d%r~Kg*U9>$_}_rv8($euN=uV} z&~KV&9mda>+XNxjWY0{ZNxQ(MRN-T4iC&*F5t`4*jF*%cGeL)Rq?XJ zwld9l%2%FxKF{)woX;~}@huS~ str: +def run_vocabulary_curve(filepath: Path, max_length: int, *, dump_vocab: bool = False) -> str: """Run the C vocabulary_curve executable. Args: filepath: Path to the text file. max_length: Maximum excerpt length. + dump_vocab: If True, also dump all vocabulary up to max rank needed. Returns: Output from the executable. @@ -79,8 +80,12 @@ def run_vocabulary_curve(filepath: Path, max_length: int) -> str: "Please compile it first: cd C/vocabulary_curve && make" ) + cmd = [str(C_EXECUTABLE), str(filepath), str(max_length)] + if dump_vocab: + cmd.append("--dump-vocab") + result = subprocess.run( - [str(C_EXECUTABLE), str(filepath), str(max_length)], + cmd, capture_output=True, text=True, timeout=120, @@ -89,7 +94,7 @@ def run_vocabulary_curve(filepath: Path, max_length: int) -> str: return result.stdout -def parse_vocabulary_curve_output(output: str, target_length: int) -> tuple[str, list[tuple[str, int]]]: +def parse_vocabulary_curve_output(output: str, target_length: int) -> tuple[str, list[tuple[str, int]], list[tuple[str, int]]]: """Parse output from vocabulary_curve to get words needed. Args: @@ -97,11 +102,14 @@ def parse_vocabulary_curve_output(output: str, target_length: int) -> tuple[str, target_length: The target excerpt length. Returns: - Tuple of (excerpt_text, list of (word, rank) tuples). + Tuple of (excerpt_text, excerpt_words, all_vocab_words). + excerpt_words: words in the excerpt with their ranks. + all_vocab_words: all words up to max rank (from VOCAB_DUMP if present). """ lines = output.split("\n") excerpt = "" - words: list[tuple[str, int]] = [] + excerpt_words: list[tuple[str, int]] = [] + all_vocab: list[tuple[str, int]] = [] # Find the line for the target length i = 0 @@ -131,26 +139,28 @@ def parse_vocabulary_curve_output(output: str, target_length: int) -> tuple[str, # Parse "word(#rank), word2(#rank2), ..." pattern = r"(\S+)\(#(\d+)\)" matches = re.findall(pattern, words_part) - words = [(w, int(r)) for w, r in matches] + excerpt_words = [(w, int(r)) for w, r in matches] break i += 1 - return excerpt, words + # Parse VOCAB_DUMP section if present + in_vocab_dump = False + for line in lines: + if line.strip() == "VOCAB_DUMP_START": + in_vocab_dump = True + continue + if line.strip() == "VOCAB_DUMP_END": + break + if in_vocab_dump and ";" in line: + parts = line.strip().split(";") + if len(parts) == 2: + word, rank_str = parts + try: + all_vocab.append((word, int(rank_str))) + except ValueError: + pass - -def get_top_n_words(text: str, n: int) -> list[tuple[str, int]]: - """Get the top N most frequent words from text. - - Args: - text: The source text. - n: Number of top words to return. - - Returns: - List of (word, rank) tuples, ranked 1 to n. - """ - word_counts = analyze_text(text) - sorted_words = sorted(word_counts.items(), key=lambda x: (-x[1], x[0])) - return [(word, rank + 1) for rank, (word, _) in enumerate(sorted_words[:n])] + return excerpt, excerpt_words, all_vocab def find_word_contexts( @@ -196,6 +206,8 @@ def generate_anki_deck( deck_name: str = "Vocabulary", include_context: bool = False, no_translate: bool = False, + excerpt: str = "", + excerpt_words: list[tuple[str, int]] | None = None, ) -> str: """Generate Anki-compatible deck content. @@ -207,6 +219,8 @@ def generate_anki_deck( deck_name: Name for the deck. include_context: Whether to include context in cards. no_translate: If True, skip translation (use placeholder). + excerpt: The target excerpt text to include in cards. + excerpt_words: List of (word, rank) tuples for words in the excerpt. Returns: Semicolon-separated content ready for Anki import. @@ -224,6 +238,27 @@ def generate_anki_deck( lines.append("#columns:Front;Back;Rank") lines.append("") # Empty line before data + # Add excerpt as first card (goal/context card) + if excerpt: + excerpt_escaped = excerpt.replace(";", ",") + # Use excerpt_words from C output (has correct ranks) + if excerpt_words: + # Most frequent = lowest rank (italics), rarest = highest rank (bold) + most_frequent = min(excerpt_words, key=lambda x: x[1])[0] + rarest = max(excerpt_words, key=lambda x: x[1])[0] + # Apply formatting - rarest first (bold), then most frequent (italics) + # to avoid nested tag issues if they're the same word + if most_frequent != rarest: + pattern_rare = re.compile(rf"\b({re.escape(rarest)})\b", re.IGNORECASE) + excerpt_escaped = pattern_rare.sub(r"\1", excerpt_escaped) + pattern_freq = re.compile(rf"\b({re.escape(most_frequent)})\b", re.IGNORECASE) + excerpt_escaped = pattern_freq.sub(r"\1", excerpt_escaped) + else: + # Same word is both most and least frequent - use bold+italic + pattern = re.compile(rf"\b({re.escape(most_frequent)})\b", re.IGNORECASE) + excerpt_escaped = pattern.sub(r"\1", excerpt_escaped) + lines.append(f"📖 TARGET EXCERPT;{excerpt_escaped};#0") + # Get translations (or skip if no_translate) words = [w for w, _ in words_with_ranks] if no_translate: @@ -263,6 +298,120 @@ def generate_anki_deck( return "\n".join(lines) +def get_cached_excerpt( + filepath: Path, length: int, *, force: bool = False +) -> tuple[str, list[tuple[str, int]]] | None: + """Get cached excerpt if available. + + Args: + filepath: Path to source file. + length: Excerpt length. + force: If True, ignore cache. + + Returns: + Tuple of (excerpt, words) or None if not cached. + """ + if force: + return None + try: + from python_pkg.word_frequency.cache import get_vocab_curve_cache + return get_vocab_curve_cache().get(filepath, length) + except ImportError: + return None + + +def cache_excerpt( + filepath: Path, length: int, excerpt: str, words: list[tuple[str, int]] +) -> None: + """Store excerpt in cache. + + Args: + filepath: Path to source file. + length: Excerpt length. + excerpt: The excerpt text. + words: List of (word, rank) tuples. + """ + try: + from python_pkg.word_frequency.cache import get_vocab_curve_cache + get_vocab_curve_cache().set(filepath, length, excerpt, words) + except ImportError: + pass + + +def get_cached_deck( + filepath: Path, + length: int, + target_lang: str, + include_context: bool, + all_vocab: bool, + *, + force: bool = False, +) -> tuple[str, str, int, int] | None: + """Get cached Anki deck if available. + + Args: + filepath: Path to source file. + length: Excerpt length. + target_lang: Target language. + include_context: Whether context is included. + all_vocab: Whether all vocab is included. + force: If True, ignore cache. + + Returns: + Tuple of (content, excerpt, num_words, max_rank) or None. + """ + if force: + return None + try: + from python_pkg.word_frequency.cache import get_anki_deck_cache + return get_anki_deck_cache().get( + filepath, length, target_lang, include_context, all_vocab + ) + except ImportError: + return None + + +def cache_deck( + filepath: Path, + length: int, + target_lang: str, + include_context: bool, + all_vocab: bool, + anki_content: str, + excerpt: str, + num_words: int, + max_rank: int, +) -> None: + """Store Anki deck in cache. + + Args: + filepath: Path to source file. + length: Excerpt length. + target_lang: Target language. + include_context: Whether context is included. + all_vocab: Whether all vocab is included. + anki_content: The deck content. + excerpt: The excerpt text. + num_words: Number of words. + max_rank: Maximum rank. + """ + try: + from python_pkg.word_frequency.cache import get_anki_deck_cache + get_anki_deck_cache().set( + filepath, + length, + target_lang, + include_context, + all_vocab, + anki_content, + excerpt, + num_words, + max_rank, + ) + except ImportError: + pass + + def generate_flashcards( filepath: str | Path, excerpt_length: int, @@ -272,6 +421,8 @@ def generate_flashcards( deck_name: str | None = None, all_vocab: bool = True, no_translate: bool = False, + *, + force: bool = False, ) -> tuple[str, str, int, int]: """Generate Anki flashcards for vocabulary needed for an excerpt length. @@ -285,26 +436,39 @@ def generate_flashcards( all_vocab: If True, include ALL words from rank 1 to max rank needed. If False, only include words that appear in the excerpt. no_translate: If True, skip translation. + force: If True, ignore all caches and regenerate. Returns: Tuple of (anki_content, excerpt, num_words, max_rank). """ filepath = Path(filepath) - # Read the text - text = read_file(filepath) + # Check for cached full deck (if not using no_translate) + if not no_translate and not force: + cached = get_cached_deck( + filepath, excerpt_length, target_lang, include_context, all_vocab + ) + if cached is not None: + return cached + + # Read the text (only needed for context finding) + text = read_file(filepath) if include_context else "" # Auto-detect language if not provided if source_lang is None: - source_lang = detect_language(text) + sample_text = read_file(filepath)[:1000] if not text else text[:1000] + source_lang = detect_language(sample_text) if source_lang is None: - source_lang = "auto" + raise ValueError( + "Could not auto-detect source language. " + "Please specify with --from (e.g., --from pl for Polish). " + "Install langdetect for auto-detection: pip install langdetect" + ) - # Run vocabulary curve analysis - output = run_vocabulary_curve(filepath, excerpt_length) - - # Parse the output - excerpt, excerpt_words = parse_vocabulary_curve_output(output, excerpt_length) + # Run vocabulary curve analysis with vocab dump for all words + output = run_vocabulary_curve(filepath, excerpt_length, dump_vocab=all_vocab) + # Parse the output (now includes all vocabulary from C) + excerpt, excerpt_words, all_vocab_words = parse_vocabulary_curve_output(output, excerpt_length) if not excerpt_words: raise ValueError(f"No words found for excerpt length {excerpt_length}") @@ -312,15 +476,17 @@ def generate_flashcards( # Find max rank needed max_rank = max(rank for _, rank in excerpt_words) - # Get ALL words up to max_rank if requested - if all_vocab: - words_with_ranks = get_top_n_words(text, max_rank) + # Use vocabulary from C output + if all_vocab and all_vocab_words: + words_with_ranks = all_vocab_words else: words_with_ranks = excerpt_words # Get contexts if requested contexts = None if include_context: + if not text: + text = read_file(filepath) words = [w for w, _ in words_with_ranks] contexts = find_word_contexts(text, words) @@ -337,8 +503,24 @@ def generate_flashcards( deck_name, include_context, no_translate, + excerpt, + excerpt_words, ) + # Cache the full deck (if translated) + if not no_translate: + cache_deck( + filepath, + excerpt_length, + target_lang, + include_context, + all_vocab, + anki_content, + excerpt, + len(words_with_ranks), + max_rank, + ) + return anki_content, excerpt, len(words_with_ranks), max_rank @@ -361,19 +543,18 @@ def main(argv: Sequence[str] | None = None) -> int: "--file", "-f", type=str, - required=True, + default=None, help="Path to the text file to analyze", ) parser.add_argument( "--length", "-l", type=int, - required=True, + default=None, help="Target excerpt length (how many words you want to understand)", ) parser.add_argument( "--from", - "-F", dest="source_lang", type=str, default=None, @@ -425,9 +606,72 @@ def main(argv: Sequence[str] | None = None) -> int: action="store_true", help="Skip translation (output words without translations)", ) + parser.add_argument( + "--force", + "-F", + action="store_true", + help="Force regeneration, ignoring all caches", + ) + parser.add_argument( + "--cache-stats", + action="store_true", + help="Show cache statistics and exit", + ) + parser.add_argument( + "--clear-cache", + action="store_true", + help="Clear all caches and exit", + ) args = parser.parse_args(argv) + # Handle cache management commands + if args.cache_stats: + try: + from python_pkg.word_frequency.cache import get_all_cache_stats + except ImportError: + try: + from cache import get_all_cache_stats + except ImportError: + print("Cache module not available", file=sys.stderr) # noqa: T201 + return 1 + stats = get_all_cache_stats() + print("Cache Statistics") # noqa: T201 + print("=" * 50) # noqa: T201 + for cache_name, cache_stats in stats.items(): + print(f"\n{cache_name.upper()}:") # noqa: T201 + for key, value in cache_stats.items(): + if key == "cache_size_bytes": + if value < 1024: + size_str = f"{value} B" + elif value < 1024 * 1024: + size_str = f"{value / 1024:.1f} KB" + else: + size_str = f"{value / (1024 * 1024):.1f} MB" + print(f" {key}: {size_str}") # noqa: T201 + else: + print(f" {key}: {value}") # noqa: T201 + return 0 + + if args.clear_cache: + try: + from python_pkg.word_frequency.cache import clear_all_caches + except ImportError: + try: + from cache import clear_all_caches + except ImportError: + print("Cache module not available", file=sys.stderr) # noqa: T201 + return 1 + clear_all_caches() + print("All caches cleared.") # noqa: T201 + return 0 + + # Validate required arguments for main functionality + if args.file is None: + parser.error("--file/-f is required") + if args.length is None: + parser.error("--length/-l is required") + try: filepath = Path(args.file) if not filepath.exists(): @@ -448,6 +692,7 @@ def main(argv: Sequence[str] | None = None) -> int: deck_name=args.deck_name, all_vocab=not args.excerpt_words_only, no_translate=args.no_translate, + force=args.force, ) # Determine output path diff --git a/python_pkg/word_frequency/cache.py b/python_pkg/word_frequency/cache.py new file mode 100644 index 0000000..670f7b8 --- /dev/null +++ b/python_pkg/word_frequency/cache.py @@ -0,0 +1,641 @@ +#!/usr/bin/env python3 +"""Caching utilities for word frequency analysis. + +Provides disk-based caching for: +- Translations (word -> translation mappings) +- Vocabulary curve excerpts (file + length -> excerpt + words) +- Generated Anki decks + +Cache location: ~/.cache/word_frequency/ +""" + +from __future__ import annotations + +import hashlib +import json +import os +from pathlib import Path +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + pass + +# Default cache directory +DEFAULT_CACHE_DIR = Path.home() / ".cache" / "word_frequency" + + +def get_cache_dir() -> Path: + """Get the cache directory, creating it if needed. + + Returns: + Path to cache directory. + """ + cache_dir = Path(os.environ.get("WORD_FREQ_CACHE_DIR", str(DEFAULT_CACHE_DIR))) + cache_dir.mkdir(parents=True, exist_ok=True) + return cache_dir + + +def get_file_hash(filepath: Path) -> str: + """Compute SHA256 hash of a file's contents. + + Args: + filepath: Path to file. + + Returns: + Hex digest of file hash. + """ + hasher = hashlib.sha256() + with open(filepath, "rb") as f: + # Read in chunks for large files + for chunk in iter(lambda: f.read(65536), b""): + hasher.update(chunk) + return hasher.hexdigest() + + +def get_text_hash(text: str) -> str: + """Compute SHA256 hash of text content. + + Args: + text: Text to hash. + + Returns: + Hex digest of text hash. + """ + return hashlib.sha256(text.encode("utf-8")).hexdigest() + + +# ============================================================================= +# Translation Cache +# ============================================================================= + + +class TranslationCache: + """Cache for word translations.""" + + def __init__(self, cache_dir: Path | None = None) -> None: + """Initialize translation cache. + + Args: + cache_dir: Optional custom cache directory. + """ + self.cache_dir = cache_dir or get_cache_dir() + self.cache_file = self.cache_dir / "translations.json" + self._cache: dict[str, str] | None = None + self._dirty = False # Track if cache needs saving + + def _load_cache(self) -> dict[str, str]: + """Load cache from disk.""" + if self._cache is None: + if self.cache_file.exists(): + try: + self._cache = json.loads(self.cache_file.read_text(encoding="utf-8")) + except (json.JSONDecodeError, OSError): + self._cache = {} + else: + self._cache = {} + return self._cache + + def _save_cache(self) -> None: + """Save cache to disk if dirty.""" + if self._cache is not None and self._dirty: + self.cache_file.write_text( + json.dumps(self._cache, ensure_ascii=False, indent=2), + encoding="utf-8", + ) + self._dirty = False + + def flush(self) -> None: + """Force save cache to disk.""" + self._save_cache() + + @staticmethod + def _make_key(word: str, source_lang: str, target_lang: str) -> str: + """Create cache key for a translation. + + Args: + word: Word to translate. + source_lang: Source language code. + target_lang: Target language code. + + Returns: + Cache key string. + """ + return f"{source_lang}:{target_lang}:{word.lower()}" + + def get( + self, word: str, source_lang: str, target_lang: str + ) -> str | None: + """Get cached translation. + + Args: + word: Word to look up. + source_lang: Source language code. + target_lang: Target language code. + + Returns: + Cached translation or None if not found. + """ + cache = self._load_cache() + key = self._make_key(word, source_lang, target_lang) + return cache.get(key) + + def set( + self, word: str, source_lang: str, target_lang: str, translation: str, + *, auto_save: bool = False, + ) -> None: + """Store translation in cache. + + Args: + word: Original word. + source_lang: Source language code. + target_lang: Target language code. + translation: Translated word. + auto_save: If True, save to disk immediately. + """ + cache = self._load_cache() + key = self._make_key(word, source_lang, target_lang) + cache[key] = translation + self._dirty = True + if auto_save: + self._save_cache() + + def get_many( + self, words: list[str], source_lang: str, target_lang: str + ) -> dict[str, str]: + """Get multiple cached translations. + + Args: + words: Words to look up. + source_lang: Source language code. + target_lang: Target language code. + + Returns: + Dict mapping words to their cached translations. + """ + cache = self._load_cache() + result: dict[str, str] = {} + for word in words: + key = self._make_key(word, source_lang, target_lang) + if key in cache: + result[word.lower()] = cache[key] + return result + + def set_many( + self, + translations: dict[str, str], + source_lang: str, + target_lang: str, + ) -> None: + """Store multiple translations in cache and save to disk. + + Args: + translations: Dict mapping words to translations. + source_lang: Source language code. + target_lang: Target language code. + """ + cache = self._load_cache() + for word, translation in translations.items(): + key = self._make_key(word, source_lang, target_lang) + cache[key] = translation + self._dirty = True + self._save_cache() # Save once after all additions + + def clear(self) -> None: + """Clear all cached translations.""" + self._cache = {} + self._dirty = False + if self.cache_file.exists(): + self.cache_file.unlink() + + def stats(self) -> dict[str, Any]: + """Get cache statistics. + + Returns: + Dict with cache stats. + """ + cache = self._load_cache() + return { + "total_entries": len(cache), + "cache_file": str(self.cache_file), + "cache_size_bytes": ( + self.cache_file.stat().st_size if self.cache_file.exists() else 0 + ), + } + + +# ============================================================================= +# Vocabulary Curve Cache +# ============================================================================= + + +class VocabCurveCache: + """Cache for vocabulary curve analysis results.""" + + def __init__(self, cache_dir: Path | None = None) -> None: + """Initialize vocabulary curve cache. + + Args: + cache_dir: Optional custom cache directory. + """ + self.cache_dir = (cache_dir or get_cache_dir()) / "excerpts" + self.cache_dir.mkdir(parents=True, exist_ok=True) + + def _get_cache_path(self, file_hash: str, length: int) -> Path: + """Get path to cache file for given hash and length. + + Args: + file_hash: Hash of source file. + length: Excerpt length. + + Returns: + Path to cache file. + """ + return self.cache_dir / f"{file_hash[:16]}_{length}.json" + + def get( + self, filepath: Path, length: int + ) -> tuple[str, list[tuple[str, int]]] | None: + """Get cached excerpt and words for a file and length. + + Args: + filepath: Path to source file. + length: Excerpt length. + + Returns: + Tuple of (excerpt, words_with_ranks) or None if not cached. + """ + file_hash = get_file_hash(filepath) + cache_path = self._get_cache_path(file_hash, length) + + if not cache_path.exists(): + return None + + try: + data = json.loads(cache_path.read_text(encoding="utf-8")) + # Verify hash matches + if data.get("file_hash") != file_hash: + return None + excerpt = data["excerpt"] + words = [(w, r) for w, r in data["words"]] + return excerpt, words + except (json.JSONDecodeError, KeyError, OSError): + return None + + def set( + self, + filepath: Path, + length: int, + excerpt: str, + words: list[tuple[str, int]], + ) -> None: + """Store excerpt and words in cache. + + Args: + filepath: Path to source file. + length: Excerpt length. + excerpt: The excerpt text. + words: List of (word, rank) tuples. + """ + file_hash = get_file_hash(filepath) + cache_path = self._get_cache_path(file_hash, length) + + data = { + "file_hash": file_hash, + "filepath": str(filepath), + "length": length, + "excerpt": excerpt, + "words": [[w, r] for w, r in words], + } + + cache_path.write_text( + json.dumps(data, ensure_ascii=False, indent=2), + encoding="utf-8", + ) + + def clear(self) -> None: + """Clear all cached excerpts.""" + for cache_file in self.cache_dir.glob("*.json"): + cache_file.unlink() + + def stats(self) -> dict[str, Any]: + """Get cache statistics. + + Returns: + Dict with cache stats. + """ + cache_files = list(self.cache_dir.glob("*.json")) + total_size = sum(f.stat().st_size for f in cache_files) + return { + "total_entries": len(cache_files), + "cache_dir": str(self.cache_dir), + "cache_size_bytes": total_size, + } + + +# ============================================================================= +# Anki Deck Cache +# ============================================================================= + + +class AnkiDeckCache: + """Cache for generated Anki decks.""" + + def __init__(self, cache_dir: Path | None = None) -> None: + """Initialize Anki deck cache. + + Args: + cache_dir: Optional custom cache directory. + """ + self.cache_dir = (cache_dir or get_cache_dir()) / "anki_decks" + self.cache_dir.mkdir(parents=True, exist_ok=True) + self.metadata_file = self.cache_dir / "metadata.json" + self._metadata: dict[str, Any] | None = None + + def _load_metadata(self) -> dict[str, Any]: + """Load metadata from disk.""" + if self._metadata is None: + if self.metadata_file.exists(): + try: + self._metadata = json.loads( + self.metadata_file.read_text(encoding="utf-8") + ) + except (json.JSONDecodeError, OSError): + self._metadata = {} + else: + self._metadata = {} + return self._metadata + + def _save_metadata(self) -> None: + """Save metadata to disk.""" + if self._metadata is not None: + self.metadata_file.write_text( + json.dumps(self._metadata, ensure_ascii=False, indent=2), + encoding="utf-8", + ) + + @staticmethod + def _make_key( + file_hash: str, + length: int, + target_lang: str, + include_context: bool, + all_vocab: bool, + ) -> str: + """Create cache key for an Anki deck. + + Args: + file_hash: Hash of source file. + length: Excerpt length. + target_lang: Target language. + include_context: Whether context is included. + all_vocab: Whether all vocab is included. + + Returns: + Cache key string. + """ + flags = f"ctx{int(include_context)}_all{int(all_vocab)}" + return f"{file_hash[:16]}_{length}_{target_lang}_{flags}" + + def get( + self, + filepath: Path, + length: int, + target_lang: str, + include_context: bool, + all_vocab: bool, + ) -> tuple[str, str, int, int] | None: + """Get cached Anki deck. + + Args: + filepath: Path to source file. + length: Excerpt length. + target_lang: Target language. + include_context: Whether context is included. + all_vocab: Whether all vocab is included. + + Returns: + Tuple of (anki_content, excerpt, num_words, max_rank) or None. + """ + file_hash = get_file_hash(filepath) + key = self._make_key(file_hash, length, target_lang, include_context, all_vocab) + metadata = self._load_metadata() + + if key not in metadata: + return None + + entry = metadata[key] + if entry.get("file_hash") != file_hash: + return None + + deck_file = self.cache_dir / f"{key}.txt" + if not deck_file.exists(): + return None + + try: + content = deck_file.read_text(encoding="utf-8") + return ( + content, + entry["excerpt"], + entry["num_words"], + entry["max_rank"], + ) + except OSError: + return None + + def set( + self, + filepath: Path, + length: int, + target_lang: str, + include_context: bool, + all_vocab: bool, + anki_content: str, + excerpt: str, + num_words: int, + max_rank: int, + ) -> None: + """Store Anki deck in cache. + + Args: + filepath: Path to source file. + length: Excerpt length. + target_lang: Target language. + include_context: Whether context is included. + all_vocab: Whether all vocab is included. + anki_content: The Anki deck content. + excerpt: The excerpt text. + num_words: Number of words in deck. + max_rank: Maximum word rank. + """ + file_hash = get_file_hash(filepath) + key = self._make_key(file_hash, length, target_lang, include_context, all_vocab) + + # Save deck content + deck_file = self.cache_dir / f"{key}.txt" + deck_file.write_text(anki_content, encoding="utf-8") + + # Update metadata + metadata = self._load_metadata() + metadata[key] = { + "file_hash": file_hash, + "filepath": str(filepath), + "length": length, + "target_lang": target_lang, + "include_context": include_context, + "all_vocab": all_vocab, + "excerpt": excerpt, + "num_words": num_words, + "max_rank": max_rank, + } + self._save_metadata() + + def clear(self) -> None: + """Clear all cached decks.""" + self._metadata = {} + for cache_file in self.cache_dir.glob("*.txt"): + cache_file.unlink() + if self.metadata_file.exists(): + self.metadata_file.unlink() + + def stats(self) -> dict[str, Any]: + """Get cache statistics. + + Returns: + Dict with cache stats. + """ + metadata = self._load_metadata() + cache_files = list(self.cache_dir.glob("*.txt")) + total_size = sum(f.stat().st_size for f in cache_files) + return { + "total_entries": len(metadata), + "cache_dir": str(self.cache_dir), + "cache_size_bytes": total_size, + } + + +# ============================================================================= +# Global Cache Instances +# ============================================================================= + +# Singleton instances +_translation_cache: TranslationCache | None = None +_vocab_curve_cache: VocabCurveCache | None = None +_anki_deck_cache: AnkiDeckCache | None = None + + +def get_translation_cache() -> TranslationCache: + """Get the global translation cache instance.""" + global _translation_cache # noqa: PLW0603 + if _translation_cache is None: + _translation_cache = TranslationCache() + return _translation_cache + + +def get_vocab_curve_cache() -> VocabCurveCache: + """Get the global vocabulary curve cache instance.""" + global _vocab_curve_cache # noqa: PLW0603 + if _vocab_curve_cache is None: + _vocab_curve_cache = VocabCurveCache() + return _vocab_curve_cache + + +def get_anki_deck_cache() -> AnkiDeckCache: + """Get the global Anki deck cache instance.""" + global _anki_deck_cache # noqa: PLW0603 + if _anki_deck_cache is None: + _anki_deck_cache = AnkiDeckCache() + return _anki_deck_cache + + +def clear_all_caches() -> None: + """Clear all caches.""" + get_translation_cache().clear() + get_vocab_curve_cache().clear() + get_anki_deck_cache().clear() + + +def get_all_cache_stats() -> dict[str, dict[str, Any]]: + """Get statistics for all caches. + + Returns: + Dict with stats for each cache type. + """ + return { + "translations": get_translation_cache().stats(), + "vocab_curves": get_vocab_curve_cache().stats(), + "anki_decks": get_anki_deck_cache().stats(), + } + + +def main() -> int: + """CLI for cache management. + + Returns: + Exit code. + """ + import argparse + + parser = argparse.ArgumentParser(description="Manage word frequency caches") + parser.add_argument( + "--stats", action="store_true", help="Show cache statistics" + ) + parser.add_argument( + "--clear", action="store_true", help="Clear all caches" + ) + parser.add_argument( + "--clear-translations", action="store_true", help="Clear translation cache" + ) + parser.add_argument( + "--clear-excerpts", action="store_true", help="Clear excerpt cache" + ) + parser.add_argument( + "--clear-anki", action="store_true", help="Clear Anki deck cache" + ) + + args = parser.parse_args() + + if args.clear: + clear_all_caches() + print("All caches cleared.") # noqa: T201 + return 0 + + if args.clear_translations: + get_translation_cache().clear() + print("Translation cache cleared.") # noqa: T201 + return 0 + + if args.clear_excerpts: + get_vocab_curve_cache().clear() + print("Excerpt cache cleared.") # noqa: T201 + return 0 + + if args.clear_anki: + get_anki_deck_cache().clear() + print("Anki deck cache cleared.") # noqa: T201 + return 0 + + # Default: show stats + stats = get_all_cache_stats() + print("Cache Statistics") # noqa: T201 + print("=" * 50) # noqa: T201 + for cache_name, cache_stats in stats.items(): + print(f"\n{cache_name.upper()}:") # noqa: T201 + for key, value in cache_stats.items(): + if key == "cache_size_bytes": + # Format as human-readable + if value < 1024: + size_str = f"{value} B" + elif value < 1024 * 1024: + size_str = f"{value / 1024:.1f} KB" + else: + size_str = f"{value / (1024 * 1024):.1f} MB" + print(f" {key}: {size_str}") # noqa: T201 + else: + print(f" {key}: {value}") # noqa: T201 + + return 0 + + +if __name__ == "__main__": + import sys + sys.exit(main()) diff --git a/python_pkg/word_frequency/run_anki_generator.sh b/python_pkg/word_frequency/run_anki_generator.sh new file mode 100755 index 0000000..dc946be --- /dev/null +++ b/python_pkg/word_frequency/run_anki_generator.sh @@ -0,0 +1,153 @@ +#!/bin/bash +# Wrapper script for anki_generator that ensures argostranslate is available +# +# Usage: ./run_anki_generator.sh [anki_generator args...] +# Example: ./run_anki_generator.sh --file text.txt --length 20 --from pl --to en + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# Use /tmp for venv to avoid home directory quota issues +VENV_DIR="/tmp/.venv_argos_$(id -u)" + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +log_info() { + echo -e "${GREEN}[INFO]${NC} $1" +} + +log_warn() { + echo -e "${YELLOW}[WARN]${NC} $1" +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +# Convert relative file paths to absolute before changing directories +resolve_file_paths() { + local args=() + local i=0 + while [[ $i -lt ${#ORIGINAL_ARGS[@]} ]]; do + local arg="${ORIGINAL_ARGS[$i]}" + if [[ "$arg" == "--file" || "$arg" == "-f" ]]; then + args+=("$arg") + ((i++)) + if [[ $i -lt ${#ORIGINAL_ARGS[@]} ]]; then + local file="${ORIGINAL_ARGS[$i]}" + # Convert relative path to absolute + if [[ -f "$file" ]]; then + file="$(cd "$(dirname "$file")" && pwd)/$(basename "$file")" + fi + args+=("$file") + fi + else + args+=("$arg") + fi + ((i++)) + done + echo "${args[@]}" +} + +# Store original args before any directory changes +ORIGINAL_ARGS=("$@") + +# Check if argostranslate is available +check_argos() { + python -c "import argostranslate" 2>/dev/null +} + +# Try to install argostranslate using pipx (system-wide) +try_pipx_install() { + if command -v pipx &>/dev/null; then + log_info "Trying pipx install argostranslate..." + if pipx install argostranslate 2>/dev/null; then + log_info "argostranslate installed via pipx" + return 0 + fi + fi + return 1 +} + +# Create/use a virtualenv for argostranslate +setup_venv() { + # Use /tmp for pip cache to avoid home directory quota issues + export PIP_CACHE_DIR="/tmp/.pip_cache_$(id -u)" + mkdir -p "$PIP_CACHE_DIR" + + if [[ ! -d "$VENV_DIR" ]]; then + log_info "Creating virtual environment at $VENV_DIR..." + python -m venv "$VENV_DIR" + fi + + # Activate venv + source "$VENV_DIR/bin/activate" + + # Install argostranslate if not present + if ! python -c "import argostranslate" 2>/dev/null; then + log_info "Installing argostranslate in virtualenv (this may take a few minutes)..." + # Use CPU-only PyTorch to reduce download size significantly (~200MB vs ~900MB) + # Use --no-cache-dir to avoid any cache writes to home directory + pip install --progress-bar on --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu + pip install --progress-bar on --no-cache-dir argostranslate + fi + + # Install langdetect for auto language detection + if ! python -c "import langdetect" 2>/dev/null; then + log_info "Installing langdetect for auto language detection..." + pip install --progress-bar on --no-cache-dir langdetect + fi + + # Also ensure other dependencies are available + if [[ -f "${SCRIPT_DIR}/../../requirements.txt" ]]; then + pip install --progress-bar on --no-cache-dir -r "${SCRIPT_DIR}/../../requirements.txt" 2>/dev/null || true + fi + + log_info "Using virtualenv: $VENV_DIR" +} + +# Main logic +main() { + # Resolve file paths to absolute before changing directories + local resolved_args + resolved_args=$(resolve_file_paths) + + # If --no-translate is passed, we don't need argostranslate + if [[ " $* " =~ " --no-translate " ]] || [[ " $* " =~ " -n " ]]; then + log_info "Running without translation (--no-translate)" + cd "$(dirname "$SCRIPT_DIR")" && cd .. + python -m python_pkg.word_frequency.anki_generator $resolved_args + exit $? + fi + + # Check if argostranslate is already available + if check_argos; then + log_info "argostranslate is available" + cd "$(dirname "$SCRIPT_DIR")" && cd .. + python -m python_pkg.word_frequency.anki_generator $resolved_args + exit $? + fi + + log_warn "argostranslate not found in system Python" + + # Try pipx first (cleaner system-wide installation) + if try_pipx_install && check_argos; then + cd "$(dirname "$SCRIPT_DIR")" && cd .. + python -m python_pkg.word_frequency.anki_generator $resolved_args + exit $? + fi + + # Fall back to virtualenv + log_info "Setting up virtualenv with argostranslate..." + setup_venv + + # Run in venv context + cd "$(dirname "$SCRIPT_DIR")" && cd .. + python -m python_pkg.word_frequency.anki_generator $resolved_args +} + +main "$@" diff --git a/python_pkg/word_frequency/test_texts/polish_pan_tadeusz_anki_20.txt b/python_pkg/word_frequency/test_texts/polish_pan_tadeusz_anki_20.txt new file mode 100644 index 0000000..5ee09a7 --- /dev/null +++ b/python_pkg/word_frequency/test_texts/polish_pan_tadeusz_anki_20.txt @@ -0,0 +1,2040 @@ +#separator:semicolon +#html:true +#deck:polish_pan_tadeusz_vocab_20 +#tags:vocabulary pl +#columns:Front;Back;Rank + +📖 TARGET EXCERPT;z oczu szukano gdzie się pod stół schował gdy nagle z drugiej strony wyszedł jak spod ziemi podniósłszy w górę;#0 +i;and;#1 +w;In;#2 +się;au;#3 +z;ed;#4 +na;au;#5 +nie;no;#6 +—;—;#7 +jak;how to;#8 +do;au;#9 +a;a;#10 +to;This;#11 +że;That;#12 +o;o;#13 +za;for;#14 +po;au;#15 +już;Now.;#16 +tak;Yes;#17 +co;a;#18 +od;from;#19 +»;»;#20 +lecz;but;#21 +bo;because;#22 +gdy;when;#23 +pan;Mr;#24 +ja;me;#24 +jest;is;#26 +ale;but;#27 +był;was;#28 +nim;dic;#29 +rzekł;He said;#30 +go;dic;#31 +tylko;only;#32 +jako;as;#33 +mu;au;#34 +Że;That;#35 +mnie;me;#36 +tu;Here.;#37 +on;he;#37 +ten;is;#39 +czy;is;#40 +hrabia;Count;#41 +sędzia;Judge;#42 +tam;There.;#43 +pod;under;#44 +aż;au;#45 +dla;for;#45 +u;au;#47 +nad;over;#48 +więc;Well;#49 +ich;their;#49 +tadeusz;tadeus;#51 +tym;with;#52 +przed;before;#53 +przy;with;#54 +sam;alone;#54 +przez;au;#56 +ze;ed;#56 +jeszcze;more;#58 +kto;who;#59 +gdzie;where;#59 +bez;without;#59 +jej;her;#59 +ku;the;#63 +wszyscy;All;#63 +wojski;military;#65 +choć;Come on.;#66 +było;was;#67 +potem;Then;#68 +miał;had;#69 +mi;me;#69 +jego;his;#71 +teraz;Now;#71 +ją;her;#73 +by;by;#74 +oczy;eyes;#74 +dziś;Today;#76 +może;may;#77 +domu;home;#77 +kiedy;when;#79 +ma;has;#80 +który;which;#81 +nawet;even;#82 +znowu;again;#82 +nas;us;#84 +jakby;like;#85 +jeśli;if;#85 +wszystko;everything;#87 +raz;once;#88 +szlachta;nobles;#89 +niech;Let;#89 +we;au;#91 +tej;this;#91 +lub;or;#91 +też;also;#94 +sobie;I'm sorry.;#94 +albo;either;#96 +ręce;Hands;#96 +te;These;#96 +gerwazy;gervas;#96 +między;between;#100 +są;are;#100 +była;was;#102 +cóż;Well;#102 +będzie;will be;#104 +głowy;Common;#104 +telimena;telimena;#104 +bardzo;Very;#107 +tadeusza;tadeus;#108 +razem;Total;#108 +głowę;Head;#110 +je;eat;#111 +ziemi;soil;#112 +mój;mine;#112 +robak;worm;#112 +ręką;hand;#112 +klucznik;keypad;#112 +ci;Common;#117 +dwa;two;#117 +ty;you;#119 +podkomorzy;sub-chambers;#119 +zamku;castle;#119 +gdyby;if;#119 +nic;Nothing.;#123 +wszystkie;all;#123 +widać;You can see;#125 +rękę;Hand;#125 +siebie;self;#125 +krzyknął;He screamed;#125 +tymczasem;Meanwhile;#129 +wtem;int;#129 +które;which;#129 +rzecz;thing;#129 +ani;neither;#129 +ledwie;Only;#129 +ni;au;#129 +zaraz;Wait.;#136 +jeden;one;#136 +was;you;#136 +zawsze;Always;#139 +…;...;#139 +nich;them;#141 +tego;This;#141 +nigdy;never;#143 +coraz;getting;#143 +długo;long;#145 +chciał;He wanted to;#145 +czas;time;#145 +nikt;Nobody;#145 +zaś;and;#145 +tyle;That's it.;#145 +ksiądz;priest;#145 +panie;Sir;#145 +dotąd;so far;#145 +przecież;It's not like;#145 +wszystkich;all;#155 +pana;Mr;#155 +wielki;big;#155 +mam;Got it.;#155 +ryków;roar;#155 +strony;pages;#155 +sędziego;Judge;#161 +trzeba;need;#161 +właśnie;Yeah.;#161 +gości;guests;#161 +niej;her;#165 +drzwi;door;#165 +wtenczas;then;#165 +niby;As if;#165 +góry;mountains;#165 +szlachty;nobles;#165 +litwie;Lithuania;#171 +stał;constant;#171 +każdy;each;#171 +stąd;From here;#171 +pierwszy;first;#171 +bóg;God;#171 +wkoło;around;#177 +serce;heart;#177 +której;of;#177 +wnet;immediately;#177 +my;us;#177 +asesor;asessor;#177 +dwóch;two;#177 +tych;These;#184 +która;which;#184 +przerwał;discontinued;#184 +zosia;zosis;#184 +cię;you;#188 +dobrze;Good.;#188 +okiem;eye;#188 +stary;man;#188 +wie;knows;#188 +chociaż;Although;#188 +których;which;#188 +rejent;regint;#188 +koniec;End;#188 +zosi;ris;#188 +nam;us;#198 +głową;head;#198 +hrabiego;Count;#198 +człowiek;man;#201 +świecie;world;#201 +wszak;After all,;#201 +wielkie;big;#204 +tuż;right;#204 +coś;something;#204 +wy;you;#204 +dwie;two;#204 +broń;weapons;#204 +słychać;I can hear you.;#204 +«;«;#204 +widząc;seeing;#212 +mógł;could;#212 +ta;ta;#212 +nieraz;often;#212 +oba;both;#212 +tę;this;#212 +woźny;Janitor;#218 +kształt;shape;#218 +ręku;hands;#218 +im;i;#218 +końcu;End;#218 +pewnie;Sure.;#218 +nagle;suddenly;#218 +wreszcie;Finally;#218 +ów;This;#218 +wiem;I know.;#218 +często;Common;#218 +iż;that;#218 +usta;mouth;#218 +swe;his;#231 +dalej;Come on.;#231 +zamek;lock;#231 +horeszków;Horns;#231 +byłem;I was.;#231 +sobą;me;#231 +znak;character;#231 +czasem;Sometimes;#231 +także;also;#231 +ciebie;you;#231 +swą;his;#231 +zawołał;called;#231 +niż;than;#243 +były;were;#243 +niegdyś;Once;#243 +lat;years;#243 +panów;gentlemen;#243 +prawda;Right.;#243 +głos;voice;#243 +słowo;word;#243 +swej;his;#243 +drzewa;trees;#252 +trzy;three;#252 +twarz;face;#252 +cały;whole;#252 +naprzód;forward;#252 +mówiąc;Saying;#252 +śród;Mid;#252 +zaczął;started;#252 +wiesz;You know;#252 +major;Major;#252 +oczyma;eyes;#262 +pole;field;#262 +aby;au;#262 +swych;his;#262 +ona;She;#262 +jenerał;protein;#262 +sopliców;soplicas;#262 +jacek;jack;#262 +dworze;Court;#270 +widział;He saw;#270 +dosyć;Enough;#270 +jutro;tomorrow;#270 +młodzież;adolescents;#270 +wolna;free;#270 +którą;which;#270 +pomiędzy;between;#270 +pani;Madam;#270 +taki;such;#270 +środku;center;#270 +cała;whole;#270 +całą;whole;#270 +czym;with;#270 +ludzi;people;#270 +jestem;I am;#270 +ostatni;last;#286 +jesteś;You are;#286 +wiele;multiple;#286 +dał;gave;#286 +nieco;a little;#286 +wzrok;eyesight;#286 +ażeby;to;#286 +chce;I want;#286 +ach;ah;#286 +nogi;legs;#286 +oczu;eyes;#286 +zbyt;too;#286 +drugiej;second;#286 +cicho;Quiet.;#286 +wiatr;wind;#286 +maciej;maj;#286 +czyli;or;#302 +stołu;table;#302 +wojskiego;military;#302 +dworu;court;#302 +miała;had;#302 +całe;whole;#302 +którym;who;#302 +dość;Enough;#302 +kilka;several;#302 +serca;Cardiac;#302 +lata;years;#302 +swym;his;#302 +równie;same;#302 +drogi;roads;#302 +drugi;second;#302 +całej;whole;#317 +dawniej;past;#317 +siedzi;sits;#317 +białe;white;#317 +rąk;hands;#317 +wyszedł;He left;#317 +czuł;feeling;#317 +siedział;He was in prison.;#317 +szlachcic;noble;#317 +nasz;our;#317 +wziął;took;#317 +jeżeli;if;#317 +wpół;half;#317 +soplica;soplica;#317 +razy;times;#317 +panu;Sir;#317 +jedną;one;#317 +drugą;second;#317 +świat;world;#317 +lud;people;#317 +zosię;suspension;#317 +idzie;He's coming.;#317 +protazy;protease;#317 +moja;mine;#340 +tobie;You;#340 +widzi;see;#340 +lubił;He liked;#340 +niebo;sky;#340 +moskali;moscals;#340 +panny;Misses;#340 +mówił;said;#340 +bieży;Tracks;#340 +którego;of;#340 +goście;guests;#340 +należy;au;#340 +swój;own;#340 +czasu;time;#340 +nasze;our;#340 +wielkim;big;#340 +księdza;priest;#340 +jednej;one;#340 +nią;with her.;#340 +niedźwiedź;bear;#340 +tyłu;rear;#340 +strzelcy;shooters;#340 +oto;Here.;#340 +zdrowie;Health;#363 +blisko;close;#363 +wiedział;knew;#363 +myśl;Think;#363 +lasu;forest;#363 +pół;half;#363 +konia;horse;#363 +miejsce;place;#363 +moje;mine;#363 +ziemię;soil;#363 +prawa;rights;#363 +ludzie;people;#363 +innych;other;#363 +być;be;#363 +soplicowie;soplices;#363 +prawo;right;#363 +można;can;#363 +wszakże;But;#363 +dąbrowski;Dabrowski;#363 +długi;long;#363 +sami;Alone;#363 +prawie;almost;#363 +rad;councils;#363 +dawno;long;#386 +obok;next door;#386 +podniósł;raised;#386 +dzieje;history;#386 +słowa;words;#386 +myśli;thoughts;#386 +kobiety;women;#386 +dzieci;children;#386 +oko;eye;#386 +bernardyn;bernardine;#386 +wam;you;#386 +psy;Dogs;#386 +koło;wheel;#386 +kraju;country;#386 +widziałem;I saw;#386 +miałem;I had;#386 +muszę;I have to;#386 +żeby;To;#386 +bracie;Brother;#386 +wołał;called;#386 +broni;weapons;#386 +maciek;mat;#386 +dom;home;#408 +młody;young;#408 +poznał;met;#408 +stanął;stopped;#408 +mówić;speak;#408 +las;forest;#408 +każe;says;#408 +świata;world;#408 +sędziemu;Judge;#408 +wielka;big;#408 +więcej;more;#408 +rzeczy;things;#408 +różne;different;#408 +dziecię;child;#408 +wielkiej;big;#408 +człek;man;#408 +gadać;Talk;#408 +rozmowę;conversation;#408 +no;no;#408 +drugim;second;#408 +wsi;villages;#408 +spór;dispute;#408 +chwilę;Wait.;#408 +dawał;was;#408 +bracia;brothers;#408 +krwi;blood;#408 +kędy;where;#408 +niebie;sky;#408 +znów;again;#408 +stolnik;stool;#408 +«nie;«no;#408 +wpadł;He came by.;#408 +waść;weight;#408 +rzekła;said;#408 +me;me;#408 +cesarz;Emperor;#408 +chciałem;I wanted to.;#408 +drogę;road;#408 +chcąc;wanting;#408 +dobrzyńscy;Good;#408 +chrzciciel;baptizer;#408 +księga;Book;#449 +życie;Life;#449 +wciąż;Still;#449 +takim;such;#449 +całym;whole;#449 +któż;Who;#449 +jednym;one;#449 +stało;fixed;#449 +rana;wound;#449 +spojrzał;He looked;#449 +czasie;time;#449 +słońce;sun;#449 +niego;him;#449 +skoro;if;#449 +jedna;one;#449 +mają;have;#449 +dzień;day;#449 +lada;roll;#449 +wojna;War;#449 +tył;rear;#449 +palcem;finger;#449 +prosto;straight;#449 +krzycząc;screaming;#449 +młodzi;young;#449 +ust;mouth;#449 +ruszył;He's moving;#449 +pas;belt;#449 +mych;my;#449 +nań;ed;#449 +uszy;ears;#449 +krzyk;scream;#449 +koń;horse;#449 +wszędzie;everywhere;#449 +panowie;Gentlemen;#449 +masz;Here.;#449 +szlachtę;noble;#449 +chwili;moment;#449 +sercu;heart;#449 +rzecze;says;#449 +niechaj;Let's go.;#449 +książę;Prince;#449 +pono;pono;#449 +górę;top;#449 +imię;name;#449 +wasze;Yours;#449 +płut;Loop;#449 +ile;how much;#495 +patrząc;looking;#495 +ramiona;arms;#495 +wkrótce;soon;#495 +starzy;old;#495 +nieba;sky;#495 +szedł;He was walking;#495 +boku;side;#495 +wznosi;raises;#495 +kazał;He said;#495 +miejscu;site;#495 +miejsca;places;#495 +każdej;each;#495 +jaki;which;#495 +obie;both;#495 +byli;were;#495 +spod;from;#495 +charty;chart;#495 +sama;Alone;#495 +krzyknęli;They screamed;#495 +król;king;#495 +rozkazy;orders;#495 +swoim;his;#495 +woła;calls;#495 +rękami;hands;#495 +swoje;his;#495 +mię;me;#495 +tłum;crowd;#495 +łzami;tears;#495 +tysiąc;thousand;#495 +nimi;with;#495 +konewka;water;#495 +czoło;forehead;#495 +głupi;Stupid.;#495 +maćka;macaw;#495 +powoli;Slowly;#530 +kończył;finished;#530 +piękny;beautiful;#530 +stryj;Uncle;#530 +szyję;neck;#530 +stali;steel;#530 +ciągle;Still;#530 +wiadomo;Common;#530 +zaczęli;started;#530 +stoi;Deal.;#530 +zrobił;did;#530 +moim;mine;#530 +wszystkim;Everyone;#530 +swoich;of their;#530 +wiedzieć;know;#530 +rzucił;quit;#530 +chodził;walking;#530 +długie;long;#530 +takie;such;#530 +szukać;Search;#530 +czekać;wait;#530 +pierś;breast;#530 +pyta;asks;#530 +szła;went;#530 +psów;dogs;#530 +swego;his;#530 +strzelców;shooters;#530 +lepiej;better;#530 +dzisiaj;Today;#530 +czasów;times;#530 +izby;chambers;#530 +telimeny;telimenes;#530 +oprócz;except;#530 +wiwat;cheer;#530 +zostać;stay;#530 +tą;with;#530 +prawą;right;#530 +zwykle;Common;#530 +nóg;legs;#530 +stronie;page;#530 +bić;beat;#530 +hrabię;Count;#530 +«panie;«Lord;#530 +dobrzyński;Good morning;#530 +kropiciel;drip;#530 +jegry;jegras;#530 +pierwsze;first;#576 +progu;threshold;#576 +biała;white;#576 +daleka;far;#576 +wielką;big;#576 +nareszcie;Finally;#576 +stoją;standing;#576 +nowe;new;#576 +piersi;breast;#576 +słońca;sun;#576 +wieczerzy;Supper;#576 +wojny;wars;#576 +okna;windows;#576 +kwestarz;Quaestors;#576 +rozmowy;talks;#576 +szlachtą;a noble;#576 +trzymał;held;#576 +milczał;He was quiet.;#576 +sędzio;Judge;#576 +pamiętam;I remember.;#576 +oni;they;#576 +bernardyna;bernardin;#576 +oj;ow;#576 +duszy;soul;#576 +cztery;four;#576 +stronę;page;#576 +zna;knows;#576 +został;was;#576 +stojąc;standing;#576 +zając;hare;#576 +lewo;left;#576 +stół;table;#576 +Żeby;To;#576 +sto;Common;#576 +telimenie;telimene;#576 +ucha;Ear;#576 +bierze;takes;#576 +daremnie;In vain;#576 +mopanku;mopanku;#576 +mną;me;#576 +brat;brother;#576 +rapier;rapier;#576 +scyzoryk;pocketknife;#576 +mocno;hard;#576 +mojej;my;#576 +szpady;blades;#576 +skoczył;jumped;#576 +ogród;garden;#576 +słysząc;hearing;#576 +obu;both;#576 +znam;I know;#576 +państwo;country;#576 +mojego;my;#576 +«niech;«Let him;#576 +środek;centre;#576 +jam;jam;#576 +czarne;black;#576 +mej;mej;#576 +niebios;sky;#576 +sztuki;pieces;#576 +jankiel;jenkiel;#576 +rady;Advice;#576 +próżno;vain;#576 +źwierza;animal;#576 +czele;Head;#576 +widzę;I see;#641 +dziecko;child;#641 +ciche;quiet;#641 +dwór;court;#641 +ściany;walls;#641 +zapewne;Probably.;#641 +konie;horses;#641 +dawne;old;#641 +mniej;less;#641 +trzyma;holds;#641 +szukał;He was looking;#641 +wody;water;#641 +stała;constant;#641 +postać;Character;#641 +włos;hair;#641 +domem;home;#641 +kilku;several;#641 +woli;prefer;#641 +jedno;one;#641 +stanie;condition;#641 +sieni;sows;#641 +trudno;difficult;#641 +dobra;good;#641 +resztę;change;#641 +wielu;many;#641 +stołem;Table;#641 +idąc;walking;#641 +kolana;knees;#641 +proszę;Please.;#641 +bym;ed;#641 +powiem;I'll say;#641 +taka;such;#641 +miłość;love;#641 +kim;kim;#641 +stare;old;#641 +prócz;Except;#641 +ręki;hand;#641 +dokoła;around;#641 +całkiem;quite;#641 +mały;small;#641 +jaśnie;Common;#641 +wielmożny;great;#641 +mogę;I can;#641 +wielkich;big;#641 +wieść;news;#641 +dostał;got;#641 +jakieś;some;#641 +robaka;worm;#641 +ujrzał;He saw;#641 +dobry;Good;#641 +starzec;old man;#641 +czyż;isn't it;#641 +znaczy;I mean;#641 +krwią;blood;#641 +krew;blood;#641 +słyszał;He heard;#641 +«ja;«self;#641 +ogrodu;garden;#641 +darmo;free;#641 +puszczy;Forests;#641 +wasz;Yours;#641 +głowie;Head;#641 +będę;I will.;#641 +czeka;waiting;#641 +kocha;Love;#641 +musi;must;#641 +chwyta;captures;#641 +uderzył;hit;#641 +skrzydła;wings;#641 +lewą;left;#641 +klucznika;keypad;#641 +buchman;buchman;#641 +podkomorzego;Subventricular;#713 +kusego;Tempor;#713 +oka;eye;#713 +ojczyzny;country;#713 +laty;years;#713 +wzdłuż;length;#713 +gęsto;dense;#713 +dziedziniec;courtyard;#713 +polski;Polish;#713 +trzech;three;#713 +pokój;room;#713 +trawy;grass;#713 +daleko;far;#713 +kwiaty;flowers;#713 +okno;window;#713 +dala;away;#713 +żeś;♪ you're ♪;#713 +mamy;We have;#713 +dni;days;#713 +panem;Mr.;#713 +drogą;Road;#713 +porządku;Order;#713 +wieku;age;#713 +prędko;fast;#713 +weszli;They're in.;#713 +swojej;his;#713 +stolicy;Capital;#713 +boga;god;#713 +małą;small;#713 +mego;my;#713 +powiecie;say;#713 +dłużej;longer;#713 +polak;Pole;#713 +napoleon;napoleon;#713 +słuchał;Listen;#713 +sprawy;cases;#713 +przeciw;against;#713 +zwrócił;returned;#713 +słowem;word;#713 +łowy;fishing;#713 +nami;us;#713 +musiał;had;#713 +znalazł;found;#713 +robić;do;#713 +taką;such;#713 +ilekroć;Whenever;#713 +szczęściem;happiness;#713 +umiał;could;#713 +okolicy;area;#713 +gadał;He was talking.;#713 +króla;king;#713 +polowanie;Hunting;#713 +wedle;according to;#713 +prosi;Please;#713 +stały;fixed;#713 +dłoni;hands;#713 +niemało;a lot;#713 +nowy;new;#713 +jaką;which;#713 +patrzy;Look;#713 +długim;long;#713 +sługi;servants;#713 +bok;side;#713 +chcę;I want to;#713 +zgody;consent;#713 +stolnika;stool;#713 +izbie;Chamber;#713 +nocy;night;#713 +kul;balls;#713 +żem;thom;#713 +kręci;spin;#713 +około;approximately;#713 +ostatnie;last;#713 +wiecie;You know;#713 +niestety;Unfortunately;#713 +lasach;forests;#713 +jakże;What's the matter?;#713 +pary;pair;#713 +milczkiem;silence;#713 +lasów;forests;#713 +sławny;famous;#713 +trochę;A little;#713 +zrazu;of;#713 +pilnie;urgent;#713 +hrabio;Count;#713 +dwaj;two;#713 +«i;«and;#713 +ileż;How much;#713 +hasło;password;#713 +ciągnie;pulls;#713 +biegą;run;#713 +gerwazego;gervase;#713 +zosiu;zosis;#713 +mówi;says;#713 +patrz;see;#713 +hrabi;Count;#713 +daj;give;#713 +moskala;Moscow;#713 +com;com;#713 +boju;Fight;#713 +wpada;He's coming in.;#713 +mistrz;Master;#713 +sporu;dispute;#815 +sokoła;Falcon;#815 +iść;go;#815 +bogu;God;#815 +duszę;soul;#815 +szeroko;Wide;#815 +siedzą;They're sitting;#815 +same;alone;#815 +biegł;He ran;#815 +mieście;city;#815 +miecz;sword;#815 +małe;small;#815 +dano;given;#815 +wcale;Not at all;#815 +mogą;may;#815 +boru;boron;#815 +głębi;depth;#815 +potrzeba;need;#815 +dam;I'll give;#815 +gość;guest;#815 +jakim;which;#815 +tylu;so many;#815 +rodziny;families;#815 +żaden;none;#815 +tem;tem;#815 +proces;process;#815 +poglądał;looked;#815 +stryja;Uncle;#815 +każda;each;#815 +ojca;Father;#815 +droga;road;#815 +zręcznie;skillfully;#815 +kogo;who;#815 +skąd;from;#815 +chcą;They want;#815 +znać;known;#815 +zwłaszcza;especially;#815 +czasy;times;#815 +polskie;Polish;#815 +zza;Common;#815 +wojsku;military;#815 +moskal;moscal;#815 +kapitan;Captain;#815 +nowa;new;#815 +głowa;head;#815 +wstęgi;ribbons;#815 +rzędem;row;#815 +stoła;Table;#815 +rejentem;regient;#815 +cichu;Quiet;#815 +podobne;similar;#815 +dać;give;#815 +patrzył;He looked;#815 +chartów;chart;#815 +…»;...»;#815 +myśliwi;hunters;#815 +sprawę;case;#815 +waszeć;Yours;#815 +znał;He knew;#815 +strzelby;shotguns;#815 +którzy;who;#815 +zły;bad;#815 +dobył;have reached;#815 +rzuca;throw;#815 +szabli;swords;#815 +one;they;#815 +polu;field;#815 +starca;old man;#815 +śmierci;death;#815 +działo;Gun;#815 +dziewczyna;girl;#815 +głęboko;deep;#815 +ptaki;birds;#815 +trzykroć;three times;#815 +gromadę;cluster;#815 +kot;cat;#815 +zgoda;consent;#815 +sali;room;#815 +milczenie;Silence;#815 +płynie;Flow;#815 +soplicę;soplica;#815 +rury;pipe;#815 +powieść;novel;#815 +choćby;Even;#815 +zielonej;green;#815 +myślisz;You think;#815 +różnych;different;#815 +drzew;trees;#815 +ogrodzie;garden;#815 +chowa;hide;#815 +takiej;such;#815 +winy;fault;#815 +śmiał;I dare you.;#815 +powstał;created;#815 +temu;of;#815 +uczuł;feeling;#815 +trzeci;third;#815 +plac;square;#815 +świeci;shines;#815 +sposób;method;#815 +zdaje;a;#815 +kolei;Railway;#815 +patrzcie;Look.;#815 +owe;Common;#815 +Ów;This;#815 +ustach;mouth;#815 +tudzież;and;#815 +niezmiernie;extremely;#815 +żyje;alive;#815 +«a;«a;#815 +podoba;like;#815 +szczerze;Honestly.;#815 +oczach;eyes;#815 +naszych;Ours;#815 +część;part;#815 +soplicy;soplicas;#815 +nos;nose;#815 +głowami;heads;#815 +miłości;Love;#815 +chyba;I guess.;#815 +twój;Yours;#815 +słuchać;Listen;#815 +muszą;must;#815 +«to;«it;#815 +dobrzyńskich;Good;#815 +prusak;prusak;#815 +kropić;drip;#815 +tobą;You;#815 +jegrów;blackberries;#815 +serwis;service;#815 +zajazd;inn;#945 +drugie;second;#945 +Śród;Mid;#945 +wcześnie;early;#945 +końca;end;#945 +piękne;beautiful;#945 +rejtan;ruitan;#945 +leży;lies;#945 +skłonił;He made;#945 +młoda;young;#945 +dłonie;hands;#945 +biały;white;#945 +głośno;loud;#945 +zaczęła;started;#945 +powieści;novels;#945 +powiadał;He said;#945 +czego;of;#945 +zbiera;collect;#945 +lasem;forest;#945 +idą;Here they come.;#945 +kroku;step;#945 +obyczaje;customs;#945 +urzędu;Office;#945 +mało;low;#945 +poważnie;Seriously.;#945 +kochał;He loved;#945 +pańskie;Your;#945 +pośrodku;mid;#945 +późno;late;#945 +drodze;road;#945 +pacierz;Paper;#945 +wyrzekł;renunciation;#945 +białą;white;#945 +żyć;Live;#945 +inni;others;#945 +przynajmniej;at least;#945 +nogą;leg;#945 +uśmiechem;smile;#945 +wiedzą;know;#945 +żył;Liver;#945 +drugich;other;#945 +mowy;speech;#945 +rozum;reason;#945 +granicę;limit;#945 +daje;give;#945 +nowina;news;#945 +słyszałem;I heard;#945 +warszawy;Warsaw;#945 +przysłowie;proverb;#945 +mówię;I say;#945 +kula;ball;#945 +grzecznie;polite;#945 +asesora;asessor;#945 +złość;Anger;#945 +myślił;thought;#945 +koniu;horse;#945 +ojciec;father;#945 +pierwszej;first;#945 +ogień;fire;#945 +miasta;cities;#945 +inaczej;different;#945 +palce;fingers;#945 +ruch;Move;#945 +gromada;cluster;#945 +wrzasnął;He screamed;#945 +rejenta;race;#945 +dwakroć;twice;#945 +złota;gold;#945 +urząd;office;#945 +wilk;wolf;#945 +rozkaz;order;#945 +czarny;black;#945 +druga;second;#945 +chłopiec;boy;#945 +każdego;each;#945 +życiu;Life;#945 +syna;son;#945 +trawie;grass;#945 +roli;role;#945 +szczwacze;Staplers;#945 +raczył;was;#945 +zwał;au;#945 +istocie;substance;#945 +szlachcica;noble;#945 +rębajło;cuticle;#945 +bawił;He was playing;#945 +znajdziesz;You'll find;#945 +górze;top;#945 +każdym;each;#945 +ognia;fire;#945 +zrobić;do;#945 +porwał;Kidnapped;#945 +wpadła;She came by.;#945 +scyzorykiem;pocketknife;#945 +jednego;one;#945 +ode;from;#945 +póki;while;#945 +byś;you;#945 +gdzieniegdzie;some;#945 +śmie;dare;#945 +blask;shine;#945 +szmer;murmur;#945 +telimeną;telimena;#945 +zowie;Common;#945 +mówiłem;I told you.;#945 +śmiejąc;laughing;#945 +rada;Council;#945 +pleban;pleban;#945 +wami;You;#945 +pokoju;peace;#945 +dumania;pride;#945 +litewskich;Lithuanian;#945 +dziki;wild;#945 +mą;Common;#945 +chłopca;boy;#945 +bokiem;side;#945 +ptastwo;birds;#945 +dłoń;hand;#945 +wierzchu;top;#945 +poły;half;#945 +szczęście;happiness;#945 +zdało;passed;#945 +gniewu;anger;#945 +kochać;Love;#945 +kraj;country;#945 +boże;God.;#945 +para;pair;#945 +mocy;Strength;#945 +leci;It's coming.;#945 +«tak;«Yes;#945 +niemu;him;#945 +kolej;railway;#945 +kaplicy;chapels;#945 +przyjaciele;friends;#945 +myśląc;thinking;#945 +radości;Joy;#945 +mieli;had;#945 +przodu;front;#945 +Żyd;Jew;#945 +pierwej;First;#945 +kroki;steps;#945 +tymi;These;#945 +muzyka;music;#945 +ramiony;arms;#945 +róg;horn;#945 +zabił;killed;#945 +wznosząc;rising;#945 +gotów;Ready;#945 +rzekłbyś;You'd say;#945 +denassów;Denasses;#945 +przyjaciela;friend;#945 +soplicowa;soplica;#945 +dobrzynie;Good morning.;#945 +łzy;tears;#945 +polska;Polish;#945 +chrzciciela;baptizer;#945 +dalbóg;dalbog;#945 +bądź;be;#945 +sak;sac;#945 +czyny;actions;#945 +rykowa;roar;#945 +litwy;lithium;#1106 +zboże;cereals;#1106 +wszerz;Across;#1106 +mieszka;lives;#1106 +folwarku;farm;#1106 +którymi;with;#1106 +oburącz;both hands;#1106 +mieszkał;lived;#1106 +ochmistrzyni;Commander;#1106 +miły;Nice;#1106 +pełne;full;#1106 +oblicze;face;#1106 +dole;bottom;#1106 +lekka;light;#1106 +krzyknęła;She screamed;#1106 +słudzy;Servants;#1106 +wyszli;They're out.;#1106 +czekają;waiting;#1106 +przyjaciel;friend;#1106 +krótkie;short;#1106 +nosił;wore;#1106 +dawnego;old;#1106 +hrabią;Count;#1106 +łące;meadow;#1106 +domy;homes;#1106 +kontusza;Other Organiser;#1106 +łąki;meadow;#1106 +chybił;missed;#1106 +tysiące;thousands;#1106 +kroków;steps;#1106 +dziedzic;heir;#1106 +zginął;He's dead.;#1106 +myśliwców;fighters;#1106 +brał;took;#1106 +kłaniał;dic;#1106 +młodzieży;adolescents;#1106 +jedli;They ate;#1106 +słów;words;#1106 +zatem;Therefore,;#1106 +zwyczajem;custom;#1106 +mym;mym;#1106 +komu;Who;#1106 +śmiało;Go ahead.;#1106 +zda;da;#1106 +człowieka;human;#1106 +rodu;family;#1106 +milczeniu;silence;#1106 +«mój;«my;#1106 +młode;young;#1106 +widzieć;See;#1106 +mimo;despite;#1106 +kiedyś;Once;#1106 +nasza;our;#1106 +zawżdy;ever;#1106 +żołnierz;soldier;#1106 +płuta;fin;#1106 +wszedł;He came in.;#1106 +włosy;hair;#1106 +wieś;village;#1106 +rozmów;talks;#1106 +mężczyźni;Male;#1106 +owa;This;#1106 +nazbyt;too much;#1106 +lica;li;#1106 +strzelać;shoot;#1106 +wojska;troops;#1106 +ciągnął;pulling;#1106 +palcami;fingers;#1106 +sokół;falcon;#1106 +pies;dog;#1106 +uchem;ear;#1106 +«prawda;«truth;#1106 +odgłos;sound;#1106 +obławy;manhunts;#1106 +chwały;glory;#1106 +naszej;our;#1106 +łowach;Fowls;#1106 +wstał;He's up.;#1106 +echem;echo;#1106 +piękna;beautiful;#1106 +zwany;called;#1106 +stają;stop;#1106 +kniaziewicz;kniaziewicz;#1106 +odszedł;He's gone;#1106 +wyskoczył;He jumped.;#1106 +przyszedł;came;#1106 +niźli;below;#1106 +ostatniego;last;#1106 +chcesz;You want;#1106 +ozwał;du;#1106 +ranny;injured;#1106 +kręcił;filming;#1106 +dobre;Good;#1106 +zwierza;animal;#1106 +sąsiadów;Neighbors;#1106 +papier;paper;#1106 +dawnych;old;#1106 +odtąd;From now on;#1106 +własnym;own;#1106 +smutnie;Sad;#1106 +skończyć;finish;#1106 +dworem;court;#1106 +włościan;tycoon;#1106 +«w;«w;#1106 +wyraz;word;#1106 +rzadko;Rare;#1106 +karabelę;carbel;#1106 +źle;bad;#1106 +wrzask;Screaming;#1106 +wokoło;around;#1106 +szkoda;damage;#1106 +konopie;hemp;#1106 +owad;insect;#1106 +pięknie;Beautiful;#1106 +chwila;Wait.;#1106 +zwyczaju;custom;#1106 +leżą;lie down;#1106 +powiada;says;#1106 +wiosną;spring;#1106 +umyślnie;deliberately;#1106 +staje;stops;#1106 +zwierząt;animals;#1106 +ekonom;econom;#1106 +mospanie;mospapine;#1106 +wzroku;eyesight;#1106 +oboje;both;#1106 +strzały;Shots;#1106 +części;parts;#1106 +parę;pair;#1106 +sprzęt;equipment;#1106 +długimi;long;#1106 +wzięła;took;#1106 +gęsi;geese;#1106 +grzybów;fungi;#1106 +chmury;clouds;#1106 +kury;Chicken;#1106 +gwiazd;stars;#1106 +okropnie;It's terrible.;#1106 +wyciągnął;He pulled;#1106 +słowy;words;#1106 +szuka;looking for;#1106 +ruchy;Move;#1106 +dziwny;strange;#1106 +tamten;that;#1106 +traw;grass;#1106 +drugiego;second;#1106 +wiatrem;wind;#1106 +aśćka;wand;#1106 +łaski;grace;#1106 +przykład;example;#1106 +mając;having;#1106 +zaręczyny;engagement;#1106 +szczęścia;happiness;#1106 +pany;gentlemen;#1106 +da;da;#1106 +patrzą;They're looking.;#1106 +będą;will be;#1106 +chmura;cloud;#1106 +hrabiemu;Count;#1106 +obadwa;obadwa;#1106 +nadziei;hope;#1106 +mieć;have;#1106 +bitwę;battle;#1106 +pieśni;songs;#1106 +sen;sleep;#1106 +czole;forehead;#1106 +wyprawy;expeditions;#1106 +żyją;They're alive.;#1106 +księże;priest;#1106 +moskalów;Moscals;#1106 +skórę;Skin;#1106 +lice;high;#1106 +krzyczy;She screams;#1106 +kręcąc;spinning;#1106 +ducha;ghost;#1106 +jedne;one;#1106 +pojedynek;duel;#1106 +zjawił;showed;#1106 +otworzył;opened;#1106 +składa;fold;#1106 +szepnął;He whispered;#1106 +bardziej;more;#1106 +zgodzić;agree;#1106 +radę;council;#1106 +ucztę;feast;#1106 +takt;tact;#1106 +widzisz;See;#1106 +jaka;which;#1106 +piją;drink;#1106 +mocniej;harder;#1106 +słup;pole;#1106 +twe;Your;#1106 +napoleona;napoleona;#1106 +powstanie;birth;#1106 +bliska;close;#1106 +kuchni;kitchen;#1106 +zaścianku;bedding;#1106 +hejże;Heaves;#1106 +brzytewka;Razors;#1106 +gdybym;If;#1106 +szpadę;sword;#1106 +stawy;joints;#1106 +waści;Weight;#1106 +struny;strings;#1106 +rękojeść;handle;#1106 +szereg;series;#1106 +drążki;Bars;#1106 +twą;Your;#1313 +mogłem;I could;#1313 +rzadka;Rare;#1313 +gaju;grove;#1313 +bronią;weapons;#1313 +zewsząd;everywhere;#1313 +okolica;area;#1313 +gwiazdy;stars;#1313 +ogromne;huge;#1313 +bramę;gate;#1313 +pociągnął;pulled;#1313 +wchodzi;in;#1313 +świeżo;fresh;#1313 +biegu;Running;#1313 +kogoś;someone;#1313 +zaledwie;Only;#1313 +bywa;sometimes;#1313 +blasku;shine;#1313 +twarzy;face;#1313 +obłok;cloud;#1313 +młodzieniec;young man;#1313 +zmieszany;mixed;#1313 +mody;fashion;#1313 +zwykł;normal;#1313 +krewny;relative;#1313 +gościa;guest;#1313 +kryjomu;hidden;#1313 +samym;alone;#1313 +sądy;courts;#1313 +bawić;Fun;#1313 +gospodarza;host;#1313 +spuszcza;flush;#1313 +szły;mouth;#1313 +jakie;which;#1313 +zwyczaj;custom;#1313 +wzgląd;reason;#1313 +łez;tears;#1313 +stoły;Tables;#1313 +dlaczego;Why;#1313 +wprawdzie;Yes.;#1313 +inne;other;#1313 +mury;walls;#1313 +damom;Ladies;#1313 +krótki;short;#1313 +siadł;sat down;#1313 +puste;empty;#1313 +pewny;sure;#1313 +wstydu;shame;#1313 +pogląda;view;#1313 +lubi;likes;#1313 +zmienia;changes;#1313 +wychowanie;Education;#1313 +córki;daughters;#1313 +ludźmi;people;#1313 +światem;World;#1313 +ścisnął;squeezed;#1313 +pamięć;memory;#1313 +drudzy;others;#1313 +grzeczność;politeness;#1313 +męża;husband;#1313 +żony;wives;#1313 +pewna;sure;#1313 +poznać;Meet;#1313 +ojczyźnie;country;#1313 +jakiej;of;#1313 +przyjechał;He's here.;#1313 +francusku;French;#1313 +zamiast;instead of;#1313 +chłopi;peasants;#1313 +francuzi;French;#1313 +gada;talks;#1313 +szło;was going;#1313 +tytuł;title;#1313 +starym;old;#1313 +prosił;asked;#1313 +ciekawy;interesting;#1313 +he;he;#1313 +pułku;regiment;#1313 +bitwie;battle;#1313 +kota;cat;#1313 +ubiór;clothing;#1313 +pośród;among;#1313 +rzęd;row;#1313 +jasnych;clear;#1313 +zaczęły;started;#1313 +zająca;hare;#1313 +podobna;similar;#1313 +później;later;#1313 +myśliłem;I thought;#1313 +będziesz;You'll be;#1313 +tajnie;secret;#1313 +kochanka;Lover;#1313 +dzieciństwa;childhood;#1313 +wilnie;Vilnius;#1313 +wolę;I prefer;#1313 +majątek;property;#1313 +nowo;New;#1313 +trwogi;anxiety;#1313 +daléj;daléj;#1313 +wytknął;detected;#1313 +kurki;chickens;#1313 +stole;table;#1313 +kusy;muses;#1313 +puścił;He let go;#1313 +zabawy;fun;#1313 +widok;view;#1313 +ogon;tail;#1313 +knieje;I can't.;#1313 +myśliwych;hunters;#1313 +polowania;hunting;#1313 +tabakiery;tabaquers;#1313 +zażył;taken;#1313 +starego;old;#1313 +ogary;hounds;#1313 +moich;mine;#1313 +szczególniej;especially;#1313 +głosem;voice;#1313 +procesu;process;#1313 +złote;gold;#1313 +kamień;stone;#1313 +żołnierzy;soldiers;#1313 +bronić;defend;#1313 +usłyszał;He heard;#1313 +«do;«to;#1313 +pokazał;showed;#1313 +powiedzieć;say;#1313 +sędzią;Judge;#1313 +skroni;temporal;#1313 +wejrzenie;sight;#1313 +pełno;full;#1313 +obcych;foreign;#1313 +pańskich;Your;#1313 +wioski;villages;#1313 +krajach;countries;#1313 +sad;orchard;#1313 +oku;eye;#1313 +wśród;among;#1313 +skacząc;Jumping;#1313 +dziedzińcu;courtyard;#1313 +trąby;trumpets;#1313 +wesoło;Happy;#1313 +kaptur;hood;#1313 +milczenia;silence;#1313 +czarnych;black;#1313 +wschodu;east;#1313 +odbijał;reflected;#1313 +dziwne;Strange;#1313 +spojrzawszy;Looking;#1313 +dwadzieścia;Twenty;#1313 +zdawało;reported;#1313 +bitwy;battles;#1313 +dwoje;two;#1313 +nisko;low;#1313 +sejm;Sejm;#1313 +machnął;wave;#1313 +rozpaczy;despair;#1313 +nieboszczyk;Dead;#1313 +częściej;Common;#1313 +łeb;head;#1313 +poznałem;I've met;#1313 +dym;Smoke;#1313 +żalu;Regret;#1313 +grobie;grave;#1313 +scyzoryku;pocketknife;#1313 +kłótni;quarrel;#1313 +przyszło;came;#1313 +pięknej;beautiful;#1313 +szerokie;wide;#1313 +woń;smell;#1313 +służą;serve;#1313 +słońcem;sun;#1313 +dół;bottom;#1313 +schyla;incline;#1313 +stada;flocks;#1313 +waszeci;Yours.;#1313 +ucho;ear;#1313 +potrawy;dishes;#1313 +polszcze;shell;#1313 +pierwsi;first;#1313 +zwierz;animal;#1313 +stronach;pages;#1313 +dowiedział;found;#1313 +dziedzica;heir;#1313 +lewej;left;#1313 +«jak;«how;#1313 +pochwały;praise;#1313 +chcecie;You want;#1313 +plan;plan;#1313 +księcia;Prince;#1313 +wielkiego;big;#1313 +śledztwo;investigation;#1313 +radził;advise;#1313 +zboża;cereals;#1313 +wierz;believe;#1313 +nawzajem;each other;#1313 +uważał;He was careful.;#1313 +ożenił;married;#1313 +kiedym;when;#1313 +radziwiłł;advised;#1313 +głów;Head;#1313 +lekkie;light;#1313 +powietrzu;air;#1313 +wiedząc;Knowing;#1313 +wąż;hose;#1313 +duch;ghost;#1313 +zdziwiony;surprised;#1313 +pośpieszał;He was rushing;#1313 +owej;of;#1313 +dziecka;child;#1313 +duchy;Ghosts;#1313 +tamci;those;#1313 +szeregi;series;#1313 +liku;liku;#1313 +końcem;End;#1313 +wracać;back;#1313 +zgodził;agreed;#1313 +młodu;young;#1313 +jacka;jack;#1313 +będziem;I'll be;#1313 +sługa;servant;#1313 +telimenę;telimena;#1313 +sztukę;piece;#1313 +perły;pearls;#1313 +niczym;nothing;#1313 +uszu;ears;#1313 +plany;plans;#1313 +rzucić;quit;#1313 +pobiegł;He ran;#1313 +celu;purpose;#1313 +plebana;plebane;#1313 +spać;sleep;#1313 +kędyś;where;#1313 +skacze;Jumps;#1313 +brodę;beard;#1313 +krwawe;bloody;#1313 +rozkazał;He ordered;#1313 +miodu;honey;#1313 +skołuba;sluice;#1313 +moi;mine;#1313 +tabaki;tabaki;#1313 +litewskim;Lithuanian;#1313 +wojsko;military;#1313 +gwałtem;rape;#1313 +tyś;you;#1313 +krzyż;cross;#1313 +powiedz;Say;#1313 +moskale;Moscals;#1313 +pułk;regiment;#1313 +«ach;«;#1313 +jużci;Now;#1313 +asan;asan;#1313 +szable;scarves;#1313 +powie;He'll say;#1313 +głosy;votes;#1313 +zęby;teeth;#1313 +słucha;Listen;#1313 +głąb;dumb;#1313 +prawej;right;#1313 +ot;ed;#1313 +łotr;villain;#1313 +sztucznie;artificially;#1313 +wre;wre;#1313 +sławy;fame;#1313 +pamiętacie;Remember;#1313 +ktoś;someone;#1313 +marszałek;Marshal;#1313 +dobywa;day;#1313 +rusza;Move;#1313 +spuścił;He flushed;#1313 +pamięci;memory;#1313 +gniewa;Anger;#1313 +mogła;could;#1313 +biesiadę;Celebrate;#1313 +podobny;similar;#1313 +możem;Maybe.;#1313 +wolne;free;#1313 +padła;It's gone.;#1313 +cofa;undo;#1313 +dobrzyńskiego;Good;#1313 +rząd;government;#1313 +wojnę;War;#1313 +pozew;suit;#1313 +litwa;lithium;#1313 +wzniósł;raised;#1313 +syn;son;#1313 +kościele;church;#1313 +takiego;such;#1313 +wojennych;war;#1313 +kropidło;cap;#1313 +brzytwa;Razor;#1313 +przemiany;Change;#1313 +mazurka;mazurka;#1313 +pić;drink;#1313 +twoje;Yours;#1313 +szatan;Satan;#1313 +cha;cha;#1313 +majorze;Major;#1313 +zemsty;Revenge;#1313 +wolnych;free;#1313 +mickiewicz;mickiewicz;#1612 +spotkanie;meeting;#1612 +dowie;He'll know.;#1612 +cudem;A miracle.;#1612 +szlachecki;noble;#1612 +dostatek;plenty;#1612 +ganek;porch;#1612 +trawę;grass;#1612 +podróżny;traveler;#1612 +wolności;Freedom;#1612 +posępny;dreary;#1612 +wrogów;enemies;#1612 +radością;Joy;#1612 +komnaty;chambers;#1612 +będąc;being;#1612 +dziesięciu;ten;#1612 +źrenice;pupils;#1612 +ciotka;aunt;#1612 +książki;Books;#1612 +okien;windows;#1612 +pełen;full;#1612 +naczynie;vessel;#1612 +ślad;trace;#1612 +piasku;sand;#1612 +kwiatów;flowers;#1612 +pukle;cloves;#1612 +cicha;quiet;#1612 +miesiąca;months;#1612 +suknie;dresses;#1612 +biegła;running;#1612 +młodzieńca;young;#1612 +zmrużył;He's gone.;#1612 +dziewica;virgin;#1612 +zląkł;I'm afraid.;#1612 +nowych;new;#1612 +koni;horses;#1612 +wieczerzę;Supper;#1612 +daleki;far;#1612 +wyjść;Out;#1612 +rozmowa;conversation;#1612 +tadeuszu;tadeus;#1612 +pamiątkę;souvenir;#1612 +ostatnich;last;#1612 +jakoby;As if;#1612 +stodoły;barn;#1612 +chował;hid;#1612 +domowi;home;#1612 +lecą;Here they come.;#1612 +ramię;shoulder;#1612 +wieczora;evening;#1612 +prędzej;Hurry;#1612 +widne;Visible;#1612 +miny;mines;#1612 +przyczyny;causes;#1612 +kądzieli;curds;#1612 +wziąć;take;#1612 +wilna;wine;#1612 +licznych;multiple;#1612 +rogi;horns;#1612 +kołem;wheel;#1612 +najwyższe;highest;#1612 +wódkę;vodka;#1612 +siedli;seat;#1612 +żwawo;Vibrant;#1612 +dziwna;Strange;#1612 +poznano;known;#1612 +wodę;water;#1612 +ogórki;cucumbers;#1612 +rzuciwszy;Since;#1612 +nowym;new;#1612 +nasi;our;#1612 +wnuki;grandchildren;#1612 +starych;old;#1612 +wojewody;voivodes;#1612 +podkomorzemu;subventricular;#1612 +inna;other;#1612 +przyjaciół;friends;#1612 +nudzi;Boring;#1612 +długa;long;#1612 +wiek;age;#1612 +złotą;gold;#1612 +moda;fashion;#1612 +przodków;ancestral;#1612 +karę;penalty;#1612 +naród;nation;#1612 +władza;authority;#1612 +natenczas;then;#1612 +francuz;French;#1612 +chwała;glory;#1612 +sława;fame;#1612 +drzewo;tree;#1612 +robaku;worm;#1612 +naszym;Ours;#1612 +odpowiedział;answered;#1612 +podniósłszy;raised;#1612 +ruskie;Russian;#1612 +majora;Major;#1612 +francuza;French;#1612 +gadano;talk;#1612 +szli;They were walking;#1612 +podział;breakdown;#1612 +zmienił;changed;#1612 +lisa;fox;#1612 +osoba;person;#1612 +zabawki;toys;#1612 +włosów;hair;#1612 +szybko;Quick;#1612 +ukłonem;bow;#1612 +chciała;She wanted;#1612 +rzędy;rows;#1612 +ławę;jury;#1612 +kłótnia;quarrel;#1612 +zdania;sentences;#1612 +świadki;Witnesses;#1612 +zdała;passed;#1612 +ubrana;dressed;#1612 +kolor;Colour;#1612 +zachodzie;west;#1612 +znikła;gone;#1612 +ogrodniczka;gardening;#1612 +odmienił;changed;#1612 +wzrokiem;eyesight;#1612 +godziny;hours;#1612 +osoby;persons;#1612 +ramion;arms;#1612 +poszli;They're gone.;#1612 +smyk;smoky;#1612 +palec;finger;#1612 +tadeuszowi;Tadeus;#1612 +postawy;attitude;#1612 +języku;language;#1612 +zresztą;Anyway,;#1612 +sąd;court;#1612 +niedawno;recent;#1612 +niespodzianie;surprise;#1612 +spadał;falling;#1612 +kichnął;sneezed;#1612 +«wiwat;« wiwat;#1612 +zadzwonił;He called;#1612 +sąsiedzie;neighbor;#1612 +powiedział;He said;#1612 +dumał;proud;#1612 +«o;«o;#1612 +raczy;flies;#1612 +jechać;drive;#1612 +gośćmi;guests;#1612 +myślą;think;#1612 +woźnego;Janitor;#1612 +damy;Ladies;#1612 +gospodarstwa;farms;#1612 +gęste;dense;#1612 +czarną;black;#1612 +«cóż;«well;#1612 +złego;bad;#1612 +spał;Sleep;#1612 +kieszeni;pocket;#1612 +służy;service;#1612 +ręka;hand;#1612 +kończąc;ending;#1612 +reszta;rest;#1612 +tysiącem;thousands;#1612 +orły;Eagles;#1612 +puszcz;Let go;#1612 +brzegów;edges;#1612 +rodzina;family;#1612 +siadał;sat down;#1612 +stara;old;#1612 +francuzów;French;#1612 +wodza;Commander;#1612 +cichy;silent;#1612 +skóry;Skin;#1612 +ludu;people;#1612 +chłopcy;boys;#1612 +otwierał;open;#1612 +nocą;night;#1612 +wiosny;spring;#1612 +zrobiła;did;#1612 +ponad;over;#1612 +krokiem;step;#1612 +pacierze;beads;#1612 +kiwnął;Nod;#1612 +środkiem;centre;#1612 +białych;white;#1612 +zatrzymał;Stop;#1612 +ujrzy;He'll see;#1612 +schyliwszy;more;#1612 +pęk;crack;#1612 +mogąc;may;#1612 +czapkę;cap;#1612 +soplicom;soplicom;#1612 +poglądając;watching;#1612 +robi;does;#1612 +prawił;♪ He was right ♪;#1612 +dniu;day;#1612 +ścianie;wall;#1612 +wstrzymał;held;#1612 +innej;other;#1612 +córkę;daughter;#1612 +piękną;beautiful;#1612 +owoż;sheep;#1612 +trzeciego;third;#1612 +moskwa;Moscow;#1612 +wali;wakes;#1612 +czynnie;Active;#1612 +dołu;Bottom;#1612 +wesół;wedding;#1612 +kulę;ball;#1612 +cel;purpose;#1612 +dar;gift;#1612 +rodzinie;family;#1612 +sławie;fame;#1612 +układy;System;#1612 +jarzyny;vegetables;#1612 +liście;leaves;#1612 +liściem;leaf;#1612 +różowe;pink;#1612 +zbudził;awakened;#1612 +poprawił;improved;#1612 +woda;water;#1612 +grona;clusters;#1612 +okulary;glasses;#1612 +wielcy;big;#1612 +wczora;Yesterday;#1612 +pola;Fields;#1612 +cara;cara;#1612 +kocham;Love;#1612 +otóż;and;#1612 +gdym;when;#1612 +grzech;sin;#1612 +wieczór;evening;#1612 +winem;wine;#1612 +dziw;Strange;#1612 +radzi;advises;#1612 +kwita;even;#1612 +tłumie;crowd;#1612 +much;fly;#1612 +rzuciła;She quit;#1612 +hałasu;noise;#1612 +krzyczał;He screamed;#1612 +ochoty;desire;#1612 +jesteście;Are you;#1612 +waszej;Yours;#1612 +znałem;I knew;#1612 +spotkać;meet;#1612 +innego;other;#1612 +pasie;belt;#1612 +dzwon;bell;#1612 +sadu;orchard;#1612 +pochyliwszy;slanting;#1612 +łacniej;softer;#1612 +gołębie;pigeons;#1612 +wroga;enemy;#1612 +barwy;Colours;#1612 +wiatru;wind;#1612 +dziewczyny;girls;#1612 +skrzydło;wing;#1612 +«czy;«;#1612 +świeże;fresh;#1612 +ziela;herbs;#1612 +rycerze;knights;#1612 +sercem;heart;#1612 +obliczem;face;#1612 +słuch;hearing;#1612 +dobrodziéj;benevolence;#1612 +tony;tonnes;#1612 +powodu;reason;#1612 +dusza;soul;#1612 +ziemia;land;#1612 +kapelusz;hat;#1612 +parkan;parkan;#1612 +ogony;tails;#1612 +mówią;They say;#1612 +wzięli;took;#1612 +pojął;understood;#1612 +pieśń;song;#1612 +spoziera;spacer;#1612 +strumień;stream;#1612 +księgi;books;#1612 +błyszcząca;shiny;#1612 +chustką;with a handkerchief;#1612 +zmieszał;mixed;#1612 +los;fate;#1612 +«jeśli;«if;#1612 +brata;brother;#1612 +gniewem;anger;#1612 +karczmie;inns;#1612 +żal;Regret;#1612 +długą;long;#1612 +gałki;knobs;#1612 +wydobył;extracted;#1612 +krzyku;screaming;#1612 +schował;Hide;#1612 +każde;each;#1612 +czeladź;Chile;#1612 +walce;Fights;#1612 +uciekł;He escaped.;#1612 +służył;service;#1612 +puszczę;I'll let go;#1612 +tłumnie;crowd;#1612 +«na;«on;#1612 +dodał;added;#1612 +gruby;fat;#1612 +karczma;inn;#1612 +bigos;bigos;#1612 +leżał;He was lying down.;#1612 +szczęśliwy;Happy;#1612 +rok;year;#1612 +ubrany;dressed;#1612 +owym;Common;#1612 +oknem;window;#1612 +zerwał;He broke up.;#1612 +czekał;He's been waiting;#1612 +karczmy;inns;#1612 +ozdoby;ornaments;#1612 +nikomu;Nobody;#1612 +skargi;complaints;#1612 +obrony;Defence;#1612 +litwini;lithvini;#1612 +księdzu;priest;#1612 +szlachcie;nobles;#1612 +pisać;write;#1612 +oczyścić;clean;#1612 +trafił;hit;#1612 +drzwiami;door;#1612 +samego;alone;#1612 +dna;bottom;#1612 +wpadły;They came by.;#1612 +gra;game;#1612 +życia;Life;#1612 +rwą;Common;#1612 +niesie;Carry;#1612 +niosą;carry;#1612 +czoła;forehead;#1612 +rurę;pipe;#1612 +pomoc;aid;#1612 +chwalił;Praised;#1612 +szukano;search;#1612 +powietrze;air;#1612 +wóz;wagon;#1612 +jechał;He was driving.;#1612 +staruszek;Old man;#1612 +czekając;Waiting;#1612 +wąs;mustache;#1612 +szabel;saber;#1612 +śmierć;death;#1612 +goni;He's chasing.;#1612 +poczciwy;Good;#1612 +stroju;costume;#1612 +ptastwem;birds;#1612 +niemy;mute;#1612 +nogach;legs;#1612 +zdrów;healthy;#1612 +uciekać;Run;#1612 +kiwając;nodding;#1612 +wino;wine;#1612 +pochylił;slid;#1612 +ponury;Dark;#1612 +polskę;Polish;#1612 +wstyd;Shame;#1612 +tycze;Subject;#1612 +tadeuszku;Tadee;#1612 +pierwszą;first;#1612 +oddać;return;#1612 +jedni;some;#1612 +«bracie;«brother;#1612 +puszczyk;Cane;#1612 +«co;«what;#1612 +zrobiłem;I did.;#1612 +stryju;Uncle;#1612 +kiwał;kival;#1612 +mierzy;measures;#1612 +dostać;get;#1612 +skończył;Finished;#1612 +biesiady;celebrations;#1612 +bydło;bovine;#1612 +ciągną;pull;#1612 +zostawił;left;#1612 +wolność;freedom;#1612 +szablę;saber;#1612 +wzruszony;moved;#1612 +car;car;#1612 +prosić;please;#1612 +koroną;crown;#1612 +jeść;Eat;#1612 +dobrzyna;Good;#1612 +macieja;matja;#1612 +zwan;ed;#1612 +rokiem;year;#1612 +biega;running;#1612 +chaty;huts;#1612 +macieju;maju;#1612 +krzyczą;They're screaming.;#1612 +jazdy;driving;#1612 +powtórzył;repeated;#1612 +władzy;power;#1612 +«hejże;«hey;#1612 +soplicowo;soplic;#1612 +kraje;countries;#1612 +zachód;west;#1612 +strzał;shot;#1612 +włożył;put;#1612 +twoim;Yours;#1612 +jadę;I'm going.;#1612 +otwarcie;opening;#1612 +czyś;are;#1612 +rozpacz;despair;#1612 +jedź;Drive;#1612 +bitwa;Battle;#1612 +twojej;your;#1612 +karabin;rifle;#1612 +wodzów;Chiefs;#1612 +dobrzy;Good;#1612 +złotem;gold;#1612 +publico;publico;#1612 +biedna;Poor;#1612 +jacku;jacku;#1612 +nosić;wear;#1612 +stany;states;#1612 +wieśniaczki;villagers;#1612 +jenerale;jenerale;#1612 +pl;en;#1612 +lektury;reading;#1612 \ No newline at end of file diff --git a/python_pkg/word_frequency/tests/test_anki_generator.py b/python_pkg/word_frequency/tests/test_anki_generator.py index ab9d36e..eb28267 100644 --- a/python_pkg/word_frequency/tests/test_anki_generator.py +++ b/python_pkg/word_frequency/tests/test_anki_generator.py @@ -13,7 +13,6 @@ try: find_word_contexts, generate_anki_deck, generate_flashcards, - get_top_n_words, main, parse_vocabulary_curve_output, ) @@ -24,7 +23,6 @@ except ImportError: find_word_contexts, generate_anki_deck, generate_flashcards, - get_top_n_words, main, parse_vocabulary_curve_output, ) @@ -80,30 +78,44 @@ class TestParseVocabularyCurveOutput: def test_parse_length_1(self, sample_vocabulary_output: str) -> None: """Test parsing output for length 1.""" - excerpt, words = parse_vocabulary_curve_output(sample_vocabulary_output, 1) + excerpt, excerpt_words, all_vocab = parse_vocabulary_curve_output(sample_vocabulary_output, 1) assert excerpt == "the" - assert words == [("the", 1)] + assert excerpt_words == [("the", 1)] def test_parse_length_2(self, sample_vocabulary_output: str) -> None: """Test parsing output for length 2.""" - excerpt, words = parse_vocabulary_curve_output(sample_vocabulary_output, 2) + excerpt, excerpt_words, all_vocab = parse_vocabulary_curve_output(sample_vocabulary_output, 2) assert excerpt == "the dog" - assert words == [("the", 1), ("dog", 2)] + assert excerpt_words == [("the", 1), ("dog", 2)] def test_parse_length_3(self, sample_vocabulary_output: str) -> None: """Test parsing output for length 3.""" - excerpt, words = parse_vocabulary_curve_output(sample_vocabulary_output, 3) + excerpt, excerpt_words, all_vocab = parse_vocabulary_curve_output(sample_vocabulary_output, 3) assert excerpt == "the quick fox" - assert len(words) == 3 - assert ("the", 1) in words - assert ("quick", 3) in words - assert ("fox", 5) in words + assert len(excerpt_words) == 3 + assert ("the", 1) in excerpt_words + assert ("quick", 3) in excerpt_words + assert ("fox", 5) in excerpt_words def test_parse_nonexistent_length(self, sample_vocabulary_output: str) -> None: """Test parsing output for non-existent length.""" - excerpt, words = parse_vocabulary_curve_output(sample_vocabulary_output, 100) + excerpt, excerpt_words, all_vocab = parse_vocabulary_curve_output(sample_vocabulary_output, 100) assert excerpt == "" - assert words == [] + assert excerpt_words == [] + + def test_parse_vocab_dump(self) -> None: + """Test parsing VOCAB_DUMP section.""" + output = """[Length 2] Vocab needed: 2 + Excerpt: "hello world" + Words: hello(#1), world(#2) + +VOCAB_DUMP_START +hello;1 +world;2 +VOCAB_DUMP_END +""" + excerpt, excerpt_words, all_vocab = parse_vocabulary_curve_output(output, 2) + assert all_vocab == [("hello", 1), ("world", 2)] # Tests for find_word_contexts @@ -250,31 +262,6 @@ class TestGenerateAnkiDeck: assert "world" in result -# Tests for get_top_n_words - - -class TestGetTopNWords: - """Tests for getting top N words.""" - - def test_get_top_5_words(self) -> None: - """Test getting top 5 words from text.""" - text = "the cat sat on the mat the cat meowed" - words = get_top_n_words(text, 5) - assert len(words) == 5 - # 'the' appears 3x, 'cat' appears 2x - assert words[0][0] == "the" - assert words[0][1] == 1 - assert words[1][0] == "cat" - assert words[1][1] == 2 - - def test_ranks_are_sequential(self) -> None: - """Test that ranks are 1-based and sequential.""" - text = "one two three four five six seven eight" - words = get_top_n_words(text, 8) - ranks = [r for _, r in words] - assert ranks == [1, 2, 3, 4, 5, 6, 7, 8] - - # Tests for main function diff --git a/python_pkg/word_frequency/tests/test_learning_pipe.py b/python_pkg/word_frequency/tests/test_learning_pipe.py index 28e7245..07b5709 100644 --- a/python_pkg/word_frequency/tests/test_learning_pipe.py +++ b/python_pkg/word_frequency/tests/test_learning_pipe.py @@ -4,6 +4,8 @@ from __future__ import annotations import time from pathlib import Path +from typing import TYPE_CHECKING +from unittest.mock import MagicMock, patch import pytest @@ -13,6 +15,40 @@ from python_pkg.word_frequency.learning_pipe import ( load_stopwords, main, ) +import python_pkg.word_frequency.learning_pipe as learning_pipe_module +from python_pkg.word_frequency.translator import TranslationResult + +if TYPE_CHECKING: + from collections.abc import Generator + + +@pytest.fixture +def mock_translation() -> Generator[MagicMock, None, None]: + """Mock translation to avoid requiring argostranslate.""" + def fake_batch_translate( + words: list[str], + from_lang: str, + to_lang: str, + *, + use_cache: bool = True, # noqa: ARG001 + ) -> list[TranslationResult]: + """Fake batch translation that returns word with prefix.""" + return [ + TranslationResult( + source_word=word, + translated_word=f"translated_{word}", + source_lang=from_lang, + target_lang=to_lang, + success=True, + ) + for word in words + ] + + # Need to patch in learning_pipe module since it imports the function directly + with patch.object( + learning_pipe_module, "translate_words_batch", side_effect=fake_batch_translate + ): + yield class TestLoadStopwords: @@ -162,7 +198,9 @@ class TestGenerateLearningLesson: class TestMain: """Tests for main CLI function.""" - def test_basic_text_input(self, capsys: pytest.CaptureFixture[str]) -> None: + def test_basic_text_input( + self, capsys: pytest.CaptureFixture[str], mock_translation: None + ) -> None: """Test with text input.""" exit_code = main( [ @@ -179,7 +217,7 @@ class TestMain: assert "LANGUAGE LEARNING LESSON" in captured.out def test_file_input( - self, tmp_path: Path, capsys: pytest.CaptureFixture[str] + self, tmp_path: Path, capsys: pytest.CaptureFixture[str], mock_translation: None ) -> None: """Test with file input.""" test_file = tmp_path / "test.txt" @@ -199,7 +237,7 @@ class TestMain: assert exit_code == 0 assert "hello" in captured.out.lower() - def test_output_to_file(self, tmp_path: Path) -> None: + def test_output_to_file(self, tmp_path: Path, mock_translation: None) -> None: """Test outputting to file.""" output_file = tmp_path / "lesson.txt" @@ -219,7 +257,7 @@ class TestMain: assert "LANGUAGE LEARNING LESSON" in content def test_custom_stopwords( - self, tmp_path: Path, capsys: pytest.CaptureFixture[str] + self, tmp_path: Path, capsys: pytest.CaptureFixture[str], mock_translation: None ) -> None: """Test with custom stopwords file.""" stopwords_file = tmp_path / "stop.txt" @@ -242,7 +280,7 @@ class TestMain: # "hello" should be filtered by custom stopwords def test_multiple_batches_option( - self, capsys: pytest.CaptureFixture[str] + self, capsys: pytest.CaptureFixture[str], mock_translation: None ) -> None: """Test --batches option.""" text = " ".join(f"word{i}" * (50 - i) for i in range(30)) @@ -329,10 +367,10 @@ class TestTranslationIntegration: # Should not have translation arrows assert " -> " not in result or "Translation" not in result - def test_lesson_with_translation_params(self) -> None: + def test_lesson_with_translation_params(self, mock_translation: None) -> None: """Test that translation params are accepted.""" text = "hello world hello world hello" - # This should not crash even without argostranslate installed + # This should work with mocked translation result = generate_learning_lesson( text, batch_size=5, @@ -346,12 +384,14 @@ class TestTranslationIntegration: assert "VOCABULARY TO LEARN:" in result assert "hello" in result - def test_main_with_translate_flags(self, tmp_path: Path) -> None: + def test_main_with_translate_flags( + self, tmp_path: Path, mock_translation: None + ) -> None: """Test that main accepts translation flags.""" text_file = tmp_path / "test.txt" text_file.write_text("hello world hello world hello", encoding="utf-8") - # Should not crash even if translation fails + # Should work with mocked translation result = main([ "--file", str(text_file), "--translate-from", "en", @@ -361,7 +401,9 @@ class TestTranslationIntegration: assert result == 0 - def test_translate_to_defaults_to_english(self, capsys: pytest.CaptureFixture[str]) -> None: + def test_translate_to_defaults_to_english( + self, capsys: pytest.CaptureFixture[str], mock_translation: None + ) -> None: """Test that translate_to defaults to 'en' when using auto-detection.""" text = "hello world" # When using --translate flag (translate_from="auto"), translate_to defaults to "en" diff --git a/python_pkg/word_frequency/tests/test_translator.py b/python_pkg/word_frequency/tests/test_translator.py index 2e80320..8e29a62 100644 --- a/python_pkg/word_frequency/tests/test_translator.py +++ b/python_pkg/word_frequency/tests/test_translator.py @@ -47,15 +47,22 @@ except ImportError: # Helper context manager for mocking argostranslate class ArgosAvailableMock: - """Context manager to mock argostranslate being available.""" + """Context manager to mock argostranslate being available and control its output. + + Works whether argos is installed or not by patching sys.modules. + """ def __init__(self, translate_returns: str | list[str] | Exception | None = None) -> None: """Initialize with return values for translate().""" self.translate_returns = translate_returns + self.mock_translate_fn = MagicMock() self.mock_translate_module = MagicMock() self.mock_package_module = MagicMock() self.mock_parent = MagicMock() self.original_available = translator._argos_available + self._sys_modules_patcher: MagicMock | None = None + self._ensure_patcher: MagicMock | None = None + self._lang_patcher: MagicMock | None = None def __enter__(self) -> MagicMock: """Set up the mocks.""" @@ -63,36 +70,52 @@ class ArgosAvailableMock: # Set up translate return value if isinstance(self.translate_returns, Exception): - self.mock_translate_module.translate.side_effect = self.translate_returns + self.mock_translate_fn.side_effect = self.translate_returns elif isinstance(self.translate_returns, list): - self.mock_translate_module.translate.side_effect = self.translate_returns + self.mock_translate_fn.side_effect = self.translate_returns elif self.translate_returns is not None: - self.mock_translate_module.translate.return_value = self.translate_returns + self.mock_translate_fn.return_value = self.translate_returns - # Link parent module to submodules (critical for Python imports) + # Wire up the mock modules + self.mock_translate_module.translate = self.mock_translate_fn + self.mock_translate_module.get_installed_languages = MagicMock(return_value=[]) + self.mock_package_module.update_package_index = MagicMock() + self.mock_package_module.get_available_packages = MagicMock(return_value=[]) self.mock_parent.translate = self.mock_translate_module self.mock_parent.package = self.mock_package_module - # Patch sys.modules - self.patchers = [ - patch.dict( - "sys.modules", - { - "argostranslate": self.mock_parent, - "argostranslate.translate": self.mock_translate_module, - "argostranslate.package": self.mock_package_module, - }, - ), - ] - for p in self.patchers: - p.start() + # Patch sys.modules to inject our mock (works even if argos not installed) + self._sys_modules_patcher = patch.dict( + "sys.modules", + { + "argostranslate": self.mock_parent, + "argostranslate.translate": self.mock_translate_module, + "argostranslate.package": self.mock_package_module, + }, + ) - return self.mock_translate_module + # Patch _ensure_argos_installed and _ensure_language_pair to no-op + self._ensure_patcher = patch.object( + translator, "_ensure_argos_installed", lambda: None + ) + self._lang_patcher = patch.object( + translator, "_ensure_language_pair", lambda f, t: None + ) + + self._sys_modules_patcher.start() + self._ensure_patcher.start() + self._lang_patcher.start() + + return self.mock_translate_fn def __exit__(self, *args: object) -> None: """Restore original state.""" - for p in self.patchers: - p.stop() + if self._lang_patcher: + self._lang_patcher.stop() + if self._ensure_patcher: + self._ensure_patcher.stop() + if self._sys_modules_patcher: + self._sys_modules_patcher.stop() translator._argos_available = self.original_available @@ -101,25 +124,13 @@ class ArgosAvailableMock: @pytest.fixture def mock_argos_unavailable() -> Generator[None, None, None]: - """Mock argostranslate being unavailable.""" + """Mock argostranslate being unavailable (for legacy tests).""" original_value = translator._argos_available translator._argos_available = False yield translator._argos_available = original_value -@pytest.fixture -def mock_all_translators_unavailable() -> Generator[None, None, None]: - """Mock both argostranslate and deep-translator being unavailable.""" - original_argos = translator._argos_available - original_deep = translator._deep_translator_available - translator._argos_available = False - translator._deep_translator_available = False - yield - translator._argos_available = original_argos - translator._deep_translator_available = original_deep - - @pytest.fixture def temp_words_file(tmp_path: Path) -> Path: """Create a temporary file with words.""" @@ -174,43 +185,36 @@ class TestTranslationResult: class TestTranslateWord: - """Tests for translate_word function.""" + """Tests for translate_word function - offline-first behavior.""" - def test_translate_word_all_backends_unavailable( - self, mock_all_translators_unavailable: None - ) -> None: - """Test translation when no backends are available.""" - result = translate_word("hello", "en", "es") - assert result.success is False - assert "No translation backend" in str(result.error) - - def test_translate_word_argos_unavailable_uses_deep_translator( - self, mock_argos_unavailable: None - ) -> None: - """Test that deep-translator is used when argos is unavailable.""" - # deep-translator should work as fallback (it's installed) - result = translate_word("hello", "en", "es") - # This may succeed if deep-translator is installed - # Just verify we get a result without crashing - assert isinstance(result, TranslationResult) + def test_translate_word_argos_unavailable_raises(self) -> None: + """Test that translation raises ImportError when argos is unavailable.""" + # Mock _ensure_argos_installed to raise ImportError + with patch.object( + translator, + "_ensure_argos_installed", + side_effect=ImportError("argostranslate not available"), + ): + with pytest.raises(ImportError, match="argostranslate not available"): + translate_word("hello", "en", "es", use_cache=False) def test_translate_word_success(self) -> None: """Test successful word translation.""" with ArgosAvailableMock("hola"): - result = translate_word("hello", "en", "es") + result = translate_word("hello", "en", "es", use_cache=False) assert result.source_word == "hello" assert result.translated_word == "hola" assert result.success is True - def test_translate_word_argos_exception_falls_back( - self, mock_argos_unavailable: None - ) -> None: - """Test that argos exception falls back to deep-translator.""" - # With argos unavailable, deep-translator should be used - result = translate_word("hello", "en", "es") - # Just verify it doesn't crash - may succeed or fail depending on network - assert isinstance(result, TranslationResult) + def test_translate_word_argos_exception_returns_error(self) -> None: + """Test that argos exception returns failed result with error.""" + # Mock argos being available but translate raising an exception + with ArgosAvailableMock(RuntimeError("Translation failed")): + result = translate_word("hello", "en", "es", use_cache=False) + + assert result.success is False + assert "Translation failed" in str(result.error) # translate_words tests @@ -221,99 +225,123 @@ class TestTranslateWords: def test_translate_empty_list(self) -> None: """Test translating empty list.""" + # Empty list returns empty result without calling translation results = translate_words([], "en", "es") assert results == [] def test_translate_multiple_words(self) -> None: """Test translating multiple words.""" - with ArgosAvailableMock(["hola", "mundo"]): - results = translate_words(["hello", "world"], "en", "es") + with ArgosAvailableMock(["hola", "mundo"]) as mock: + mock.side_effect = ["hola", "mundo"] + results = translate_words(["hello", "world"], "en", "es", use_cache=False) assert len(results) == 2 assert results[0].translated_word == "hola" assert results[1].translated_word == "mundo" + def test_translate_words_argos_unavailable_raises(self) -> None: + """Test that translating words raises ImportError when argos unavailable.""" + with patch.object( + translator, + "_ensure_argos_installed", + side_effect=ImportError("argostranslate not available"), + ): + with pytest.raises(ImportError, match="argostranslate not available"): + translate_words(["hello", "world"], "en", "es", use_cache=False) + # translate_words_batch tests class TestTranslateWordsBatch: - """Tests for translate_words_batch function.""" + """Tests for translate_words_batch function - offline-first.""" def test_batch_empty_list(self) -> None: """Test batch translation of empty list.""" - results = translate_words_batch([], "en", "es") + # Empty list doesn't require argos + with patch.object(translator, "_ensure_argos_installed", lambda: None): + results = translate_words_batch([], "en", "es") assert results == [] def test_batch_small_list(self) -> None: - """Test batch translation of small list (3 or fewer).""" - with ArgosAvailableMock(["uno", "dos", "tres"]) as mock: - results = translate_words_batch(["one", "two", "three"], "en", "es") + """Test batch translation of small list (uses batch mode anyway).""" + with ArgosAvailableMock("uno\ndos\ntres") as mock: + results = translate_words_batch( + ["one", "two", "three"], "en", "es", use_cache=False + ) assert len(results) == 3 - # Small lists use individual translation - assert mock.translate.call_count == 3 + # Batch translation + assert mock.call_count == 1 def test_batch_large_list_success(self) -> None: """Test batch translation of large list.""" words = ["one", "two", "three", "four", "five"] with ArgosAvailableMock("uno\ndos\ntres\ncuatro\ncinco") as mock: - results = translate_words_batch(words, "en", "es") + results = translate_words_batch(words, "en", "es", use_cache=False) assert len(results) == 5 # Batch translation called once - mock.translate.assert_called_once() + mock.assert_called_once() assert results[0].translated_word == "uno" assert results[4].translated_word == "cinco" def test_batch_fallback_on_mismatch(self) -> None: - """Test batch translation falls back when result count mismatches.""" + """Test batch translation falls back to individual when result count mismatches.""" words = ["one", "two", "three", "four"] # First call (batch) returns wrong count, subsequent calls are individual with ArgosAvailableMock( - ["wrong\ncount", "uno", "dos", "tres", "cuatro"] + ["wrong", "uno", "dos", "tres", "cuatro"] ) as mock: - results = translate_words_batch(words, "en", "es") + results = translate_words_batch(words, "en", "es", use_cache=False) assert len(results) == 4 - # Fallback to individual - assert mock.translate.call_count == 5 + # Fallback to individual argos translation + assert mock.call_count == 5 def test_batch_fallback_on_exception(self) -> None: - """Test batch translation falls back on exception.""" + """Test batch translation raises on exception (no fallback to online).""" words = ["one", "two", "three", "four"] - # Create mock that raises first then succeeds - original = translator._argos_available - translator._argos_available = True - + # Create mock that raises + mock_translate = MagicMock(side_effect=RuntimeError("Batch failed")) mock_translate_module = MagicMock() - mock_translate_module.translate.side_effect = [ - RuntimeError("Batch failed"), - "uno", - "dos", - "tres", - "cuatro", - ] + mock_translate_module.translate = mock_translate mock_package_module = MagicMock() mock_parent = MagicMock() mock_parent.translate = mock_translate_module mock_parent.package = mock_package_module - with patch.dict( - "sys.modules", - { - "argostranslate": mock_parent, - "argostranslate.translate": mock_translate_module, - "argostranslate.package": mock_package_module, - }, + original = translator._argos_available + translator._argos_available = True + + with ( + patch.dict( + "sys.modules", + { + "argostranslate": mock_parent, + "argostranslate.translate": mock_translate_module, + "argostranslate.package": mock_package_module, + }, + ), + patch.object(translator, "_ensure_argos_installed", lambda: None), + patch.object(translator, "_ensure_language_pair", lambda f, t: None), + pytest.raises(RuntimeError, match="Translation failed"), ): - results = translate_words_batch(words, "en", "es") + translate_words_batch(words, "en", "es", use_cache=False) translator._argos_available = original - assert len(results) == 4 + def test_batch_argos_unavailable_raises(self) -> None: + """Test that batch translation raises ImportError when argos unavailable.""" + with patch.object( + translator, + "_ensure_argos_installed", + side_effect=ImportError("argostranslate not available"), + ): + with pytest.raises(ImportError, match="argostranslate not available"): + translate_words_batch(["hello", "world"], "en", "es", use_cache=False) # format_translations tests @@ -394,10 +422,31 @@ class TestGetInstalledLanguages: mock_lang2.code = "es" mock_lang2.name = "Spanish" - with ArgosAvailableMock() as mock: - mock.get_installed_languages.return_value = [mock_lang1, mock_lang2] + # We need to mock the translate module's get_installed_languages + mock_translate_module = MagicMock() + mock_translate_module.get_installed_languages.return_value = [ + mock_lang1, mock_lang2 + ] + mock_package_module = MagicMock() + mock_parent = MagicMock() + mock_parent.translate = mock_translate_module + mock_parent.package = mock_package_module + + original = translator._argos_available + translator._argos_available = True + + with patch.dict( + "sys.modules", + { + "argostranslate": mock_parent, + "argostranslate.translate": mock_translate_module, + "argostranslate.package": mock_package_module, + }, + ): result = get_installed_languages() + translator._argos_available = original + assert ("en", "English") in result assert ("es", "Spanish") in result @@ -462,10 +511,28 @@ class TestMain: self, capsys: pytest.CaptureFixture[str] ) -> None: """Test listing languages when none installed.""" - with ArgosAvailableMock() as mock: - mock.get_installed_languages.return_value = [] + mock_translate_module = MagicMock() + mock_translate_module.get_installed_languages.return_value = [] + mock_package_module = MagicMock() + mock_parent = MagicMock() + mock_parent.translate = mock_translate_module + mock_parent.package = mock_package_module + + original = translator._argos_available + translator._argos_available = True + + with patch.dict( + "sys.modules", + { + "argostranslate": mock_parent, + "argostranslate.translate": mock_translate_module, + "argostranslate.package": mock_package_module, + }, + ): result = main(["--list-languages"]) + translator._argos_available = original + assert result == 0 captured = capsys.readouterr() assert "No languages installed" in captured.out @@ -478,10 +545,28 @@ class TestMain: mock_lang.code = "en" mock_lang.name = "English" - with ArgosAvailableMock() as mock: - mock.get_installed_languages.return_value = [mock_lang] + mock_translate_module = MagicMock() + mock_translate_module.get_installed_languages.return_value = [mock_lang] + mock_package_module = MagicMock() + mock_parent = MagicMock() + mock_parent.translate = mock_translate_module + mock_parent.package = mock_package_module + + original = translator._argos_available + translator._argos_available = True + + with patch.dict( + "sys.modules", + { + "argostranslate": mock_parent, + "argostranslate.translate": mock_translate_module, + "argostranslate.package": mock_package_module, + }, + ): result = main(["--list-languages"]) + translator._argos_available = original + assert result == 0 captured = capsys.readouterr() assert "en" in captured.out @@ -578,11 +663,14 @@ class TestMain: assert result == 1 - def test_translation_failure_returns_error( - self, mock_all_translators_unavailable: None - ) -> None: - """Test that translation failure returns error code when no backends.""" - result = main(["--text", "hello", "--from", "en", "--to", "es"]) + def test_translation_failure_returns_error(self) -> None: + """Test that translation failure returns error code when argos unavailable.""" + with patch.object( + translator, + "_ensure_argos_installed", + side_effect=ImportError("argostranslate not available"), + ): + result = main(["--text", "hello", "--from", "en", "--to", "es"]) assert result == 1 @@ -594,9 +682,10 @@ class TestIntegration: def test_full_translation_flow(self) -> None: """Test complete translation flow.""" - with ArgosAvailableMock(["uno", "dos", "tres"]): + with ArgosAvailableMock(["uno", "dos", "tres"]) as mock: + mock.side_effect = ["uno", "dos", "tres"] words = ["one", "two", "three"] - results = translate_words(words, "en", "es") + results = translate_words(words, "en", "es", use_cache=False) assert all(r.success for r in results) assert [r.translated_word for r in results] == ["uno", "dos", "tres"] @@ -606,14 +695,19 @@ class TestIntegration: assert "one" in output assert "uno" in output - def test_mixed_success_failure( - self, mock_all_translators_unavailable: None - ) -> None: - """Test handling when no translation backends are available.""" - results = translate_words(["hello", "xyz", "world"], "en", "es") + def test_mixed_success_failure(self) -> None: + """Test handling when argos raises exception for some translations.""" + # Simulate argos translating first word, then failing, then succeeding + with ArgosAvailableMock() as mock: + mock.side_effect = ["hola", RuntimeError("Unknown"), "mundo"] + results = translate_words( + ["hello", "xyz", "world"], "en", "es", use_cache=False + ) - # All should fail when no backends available - assert all(not r.success for r in results) + # First and third succeed, second fails + assert results[0].success is True + assert results[1].success is False + assert results[2].success is True output = format_translations(results) assert "Error" in output diff --git a/python_pkg/word_frequency/translator.py b/python_pkg/word_frequency/translator.py index 89c3ed6..3d7ac53 100644 --- a/python_pkg/word_frequency/translator.py +++ b/python_pkg/word_frequency/translator.py @@ -40,6 +40,65 @@ if TYPE_CHECKING: _argos_available: bool | None = None _deep_translator_available: bool | None = None _langdetect_available: bool | None = None +_gpu_initialized: bool = False +_gpu_available: bool | None = None + + +def _check_cuda_available() -> bool: + """Check if CUDA is available for GPU acceleration.""" + global _gpu_available + if _gpu_available is None: + try: + import torch + _gpu_available = torch.cuda.is_available() + except ImportError: + _gpu_available = False + return _gpu_available + + +def _init_gpu_if_available() -> None: + """Initialize GPU for argostranslate if CUDA is available. + + Raises: + RuntimeError: If CUDA is available but GPU initialization fails. + """ + global _gpu_initialized + if _gpu_initialized: + return + + if not _check_cuda_available(): + _gpu_initialized = True + return + + import sys + print("CUDA detected, initializing GPU acceleration...", file=sys.stderr) + + try: + import torch + import ctranslate2 + + # Force CTranslate2 to use CUDA + device_count = torch.cuda.device_count() + if device_count == 0: + raise RuntimeError("CUDA reports available but no GPU devices found") + + device_name = torch.cuda.get_device_name(0) + print(f" Using GPU: {device_name}", file=sys.stderr) + + # Set environment variable to force GPU usage in argos + import os + os.environ["CT2_CUDA_ALLOW_FP16"] = "1" + os.environ["CT2_USE_EXPERIMENTAL_PACKED_GEMM"] = "1" + + _gpu_initialized = True + print(" GPU acceleration enabled.", file=sys.stderr) + + except Exception as e: + raise RuntimeError( + f"CUDA is available but GPU initialization failed: {e}\n" + f"This may be due to incompatible CUDA version or driver issues.\n" + f"To disable GPU and use CPU only, set environment variable: CT2_FORCE_CPU=1" + ) from e def _check_argos() -> bool: @@ -205,85 +264,184 @@ def download_languages(lang_codes: Sequence[str]) -> dict[str, bool]: return results +def _ensure_argos_installed() -> None: + """Ensure argostranslate is installed, attempt installation if not. + + Raises: + ImportError: If argos cannot be installed. + """ + if _check_argos(): + return + + import subprocess + import sys + + print("argostranslate not found. Attempting to install...") # noqa: T201 + try: + subprocess.run( + [sys.executable, "-m", "pip", "install", "argostranslate"], + check=True, + capture_output=True, + ) + # Reset the check flag and verify + global _argos_available # noqa: PLW0603 + _argos_available = None + if not _check_argos(): + raise ImportError("argostranslate installation succeeded but import failed") + print("argostranslate installed successfully.") # noqa: T201 + except subprocess.CalledProcessError as e: + error_msg = e.stderr.decode() if e.stderr else str(e) + raise ImportError( + f"argostranslate is required for offline translation.\n\n" + f"Install manually with one of:\n" + f" pip install argostranslate # In a virtualenv\n" + f" pipx install argostranslate # System-wide via pipx\n" + f" pacman -S python-argostranslate # Arch Linux (if available)\n\n" + f"Original error: {error_msg}" + ) from e + + +def _ensure_language_pair(from_lang: str, to_lang: str) -> None: + """Ensure the language pair is available, download if needed. + + Args: + from_lang: Source language code. + to_lang: Target language code. + + Raises: + ValueError: If language pair cannot be obtained. + """ + import argostranslate.package + import argostranslate.translate + + # Check if already installed + installed_languages = argostranslate.translate.get_installed_languages() + from_lang_obj = None + to_lang_obj = None + + for lang in installed_languages: + if lang.code == from_lang: + from_lang_obj = lang + if lang.code == to_lang: + to_lang_obj = lang + + if from_lang_obj and to_lang_obj: + # Check if translation is available + translation = from_lang_obj.get_translation(to_lang_obj) + if translation: + return # Already available + + # Need to download + import sys + + print( + f"Downloading language pack: {from_lang} -> {to_lang}...", + file=sys.stderr, + ) + print(" Fetching package index...", file=sys.stderr) + argostranslate.package.update_package_index() + available = argostranslate.package.get_available_packages() + + pkg = next( + (p for p in available if p.from_code == from_lang and p.to_code == to_lang), + None, + ) + + if pkg is None: + raise ValueError( + f"No language pack available for {from_lang} -> {to_lang}. " + f"Available pairs can be listed with --list-languages." + ) + + print( + f" Downloading package (~50-100MB, this may take a minute)...", + file=sys.stderr, + ) + download_path = pkg.download() + print(" Installing language pack...", file=sys.stderr) + argostranslate.package.install_from_path(download_path) + print( + f"Language pack {from_lang} -> {to_lang} installed.", + file=sys.stderr, + ) + + def translate_word( word: str, from_lang: str, to_lang: str, + *, + use_cache: bool = True, ) -> TranslationResult: - """Translate a single word. - - Uses argostranslate if available (offline), otherwise falls back to - deep-translator (Google Translate, online). + """Translate a single word using argostranslate (offline). Args: word: The word to translate. from_lang: Source language code (e.g., 'en', 'pl', 'la'). to_lang: Target language code. + use_cache: Whether to use/update translation cache. Returns: TranslationResult with the translation. + + Raises: + ImportError: If argostranslate is not available and cannot be installed. """ - # Try argostranslate first (offline) - if _check_argos(): - import argostranslate.translate - + # Check cache first + if use_cache: try: - translated = argostranslate.translate.translate(word, from_lang, to_lang) - return TranslationResult( - source_word=word, - translated_word=translated, - source_lang=from_lang, - target_lang=to_lang, - success=True, - ) - except Exception as e: # noqa: BLE001 - # Fall through to try deep-translator - argos_error = str(e) - else: - argos_error = None + from python_pkg.word_frequency.cache import get_translation_cache + cache = get_translation_cache() + cached = cache.get(word, from_lang, to_lang) + if cached is not None: + return TranslationResult( + source_word=word, + translated_word=cached, + source_lang=from_lang, + target_lang=to_lang, + success=True, + ) + except ImportError: + pass # Cache not available - # Try deep-translator (online via Google Translate) - if _check_deep_translator(): - from deep_translator import GoogleTranslator + # Ensure argos is installed (will raise if it can't be) + _ensure_argos_installed() - try: - translator = GoogleTranslator(source=from_lang, target=to_lang) - translated = translator.translate(word) - return TranslationResult( - source_word=word, - translated_word=translated or "", - source_lang=from_lang, - target_lang=to_lang, - success=True, - ) - except Exception as e: # noqa: BLE001 - return TranslationResult( - source_word=word, - translated_word="", - source_lang=from_lang, - target_lang=to_lang, - success=False, - error=str(e), - ) + import argostranslate.translate - # Neither backend available - error_msg = "No translation backend available. Install: pip install deep-translator" - if argos_error: - error_msg = f"argostranslate error: {argos_error}" - return TranslationResult( - source_word=word, - translated_word="", - source_lang=from_lang, - target_lang=to_lang, - success=False, - error=error_msg, - ) + try: + translated = argostranslate.translate.translate(word, from_lang, to_lang) + # Cache the result + if use_cache: + try: + from python_pkg.word_frequency.cache import get_translation_cache + get_translation_cache().set(word, from_lang, to_lang, translated) + except ImportError: + pass + return TranslationResult( + source_word=word, + translated_word=translated, + source_lang=from_lang, + target_lang=to_lang, + success=True, + ) + except Exception as e: # noqa: BLE001 + return TranslationResult( + source_word=word, + translated_word="", + source_lang=from_lang, + target_lang=to_lang, + success=False, + error=str(e), + ) def translate_words( words: Sequence[str], from_lang: str, to_lang: str, + *, + use_cache: bool = True, ) -> list[TranslationResult]: """Translate multiple words. @@ -291,69 +449,187 @@ def translate_words( words: List of words to translate. from_lang: Source language code. to_lang: Target language code. + use_cache: Whether to use translation cache. Returns: List of TranslationResult for each word. """ - return [translate_word(word, from_lang, to_lang) for word in words] + return [translate_word(word, from_lang, to_lang, use_cache=use_cache) for word in words] + + +def _translate_batch_worker( + batch_words: list[str], + from_lang: str, + to_lang: str, + batch_idx: int, +) -> tuple[int, dict[str, str]]: + """Worker function to translate a batch of words. + + Args: + batch_words: Words to translate in this batch. + from_lang: Source language code. + to_lang: Target language code. + batch_idx: Index of this batch (for ordering results). + + Returns: + Tuple of (batch_idx, translations dict). + """ + import argostranslate.translate + + translations: dict[str, str] = {} + + # Batch translate by joining with newlines + batch_text = "\n".join(batch_words) + translated_batch = argostranslate.translate.translate( + batch_text, from_lang, to_lang + ) + translated_words = translated_batch.split("\n") + + # If we got the same number of translations, use them + if len(translated_words) == len(batch_words): + for word, trans in zip(batch_words, translated_words, strict=True): + translations[word.lower()] = trans.strip() + else: + # Fall back to individual translation for this batch + for word in batch_words: + translated = argostranslate.translate.translate( + word, from_lang, to_lang + ) + translations[word.lower()] = translated + + return batch_idx, translations def translate_words_batch( words: Sequence[str], from_lang: str, to_lang: str, + *, + use_cache: bool = True, ) -> list[TranslationResult]: - """Translate multiple words, attempting batch translation for efficiency. + """Translate multiple words using argostranslate (offline). - For better results with context, this joins words and translates together, - then splits. Falls back to word-by-word if batch fails. + Uses small batch translation for efficiency with frequent progress updates. + Requires argostranslate. Will use GPU if CUDA is available. Args: words: List of words to translate. from_lang: Source language code. to_lang: Target language code. + use_cache: Whether to use translation cache. Returns: List of TranslationResult for each word. + + Raises: + ImportError: If argostranslate is not available and cannot be installed. + RuntimeError: If CUDA is available but GPU initialization fails. """ if not words: return [] - # For single words or small batches, just translate individually - if len(words) <= 3: - return translate_words(words, from_lang, to_lang) + # Ensure argos is installed (will raise if it can't be) + _ensure_argos_installed() + + # Initialize GPU if available (will raise if CUDA available but fails) + _init_gpu_if_available() - # Try batch translation by joining with newlines - if not _check_argos(): - return translate_words(words, from_lang, to_lang) + # Ensure language pair is available + _ensure_language_pair(from_lang, to_lang) - import argostranslate.translate + # Check cache for already-translated words + cached_results: dict[str, str] = {} + words_to_translate: list[str] = [] - try: - # Join words with newlines for batch translation - batch_text = "\n".join(words) - translated_batch = argostranslate.translate.translate( - batch_text, from_lang, to_lang + if use_cache: + try: + from python_pkg.word_frequency.cache import get_translation_cache + cache = get_translation_cache() + cached_results = cache.get_many(list(words), from_lang, to_lang) + except ImportError: + pass + + # Find words that still need translation + for word in words: + if word.lower() not in cached_results: + words_to_translate.append(word) + + # Translate uncached words using argos batch + new_translations: dict[str, str] = {} + if words_to_translate: + import sys + + num_to_translate = len(words_to_translate) + + # Check if GPU is being used + gpu_status = " (GPU)" if _gpu_available else " (CPU)" + print( + f"Translating {num_to_translate} words from {from_lang} to {to_lang}{gpu_status}...", + file=sys.stderr, + flush=True, ) - translated_words = translated_batch.split("\n") - # If we got the same number of translations, use them - if len(translated_words) == len(words): - return [ - TranslationResult( - source_word=word, - translated_word=trans.strip(), - source_lang=from_lang, - target_lang=to_lang, - success=True, + try: + # Split into batches - larger batches are faster but show progress less often + BATCH_SIZE = 100 + batches: list[list[str]] = [] + for i in range(0, num_to_translate, BATCH_SIZE): + batches.append(words_to_translate[i:i + BATCH_SIZE]) + + total_batches = len(batches) + + # Sequential translation with progress + # (argostranslate is not thread-safe - uses global model) + for batch_idx, batch_words in enumerate(batches): + words_done = (batch_idx + 1) * BATCH_SIZE + words_done = min(words_done, num_to_translate) + pct = int(words_done / num_to_translate * 100) + + print( + f" [{pct:3d}%] Translating batch {batch_idx + 1}/{total_batches} " + f"({words_done}/{num_to_translate} words)...", + file=sys.stderr, + flush=True, ) - for word, trans in zip(words, translated_words, strict=True) - ] - except Exception: # noqa: BLE001, S110 - pass + + _, batch_translations = _translate_batch_worker( + batch_words, from_lang, to_lang, batch_idx + ) + new_translations.update(batch_translations) + + print(f" Translation complete.", file=sys.stderr, flush=True) + except Exception as e: # noqa: BLE001 + raise RuntimeError( + f"Translation failed for {from_lang} -> {to_lang}: {e}" + ) from e - # Fall back to individual translation - return translate_words(words, from_lang, to_lang) + # Cache new translations + if use_cache and new_translations: + try: + from python_pkg.word_frequency.cache import get_translation_cache + get_translation_cache().set_many(new_translations, from_lang, to_lang) + except ImportError: + pass + + # Merge cached and new translations + all_translations = {**cached_results, **new_translations} + + # Build results in original order + results: list[TranslationResult] = [] + for word in words: + translation = all_translations.get(word.lower(), "") + results.append( + TranslationResult( + source_word=word, + translated_word=translation, + source_lang=from_lang, + target_lang=to_lang, + success=bool(translation), + error=None if translation else "Translation failed", + ) + ) + + return results def format_translations( @@ -551,7 +827,12 @@ def main(argv: Sequence[str] | None = None) -> int: return 1 # Translate - results = translate_words_batch(words, args.from_lang, args.to_lang) + try: + results = translate_words_batch(words, args.from_lang, args.to_lang) + except ImportError as e: + print(f"Error: {e}", file=sys.stderr) # noqa: T201 + return 1 + output = format_translations(results) # Output