praca_magisterska/pytania/generate_anki_v2.py

184 lines
5.9 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""
Generate Anki flashcards from exam questions in odpowiedzi/ folder.
Creates a tab-separated file compatible with Anki import.
"""
import os
import re
from pathlib import Path
def extract_main_question(content, filename):
"""Extract the main exam question from the file."""
# Extract the main question from ## Pytanie section
question_match = re.search(r'## Pytanie\s*\n\s*\*\*["\']?(.+?)["\']?\*\*', content, re.DOTALL)
if question_match:
main_question = question_match.group(1).strip()
main_question = re.sub(r'\s+', ' ', main_question)
return main_question
# Fallback to title
title_match = re.search(r'^# (.+)$', content, re.MULTILINE)
return title_match.group(1) if title_match else filename
def extract_subject(content):
"""Extract the subject code."""
subject_match = re.search(r'Przedmiot:\s*(\w+)', content)
return subject_match.group(1) if subject_match else "Ogólne"
def extract_key_points(content):
"""Extract key points from the main answer section."""
points = []
# Look for main answer section
main_answer = re.search(r'## 📚 Odpowiedź główna\s*\n(.+?)(?=\n## [^<5E>]|\n---\s*\n## |\Z)', content, re.DOTALL)
if not main_answer:
return points
answer_text = main_answer.group(1)
# Extract ### headers as key points
headers = re.findall(r'^### (.+)$', answer_text, re.MULTILINE)
for h in headers[:6]:
# Clean header
h = re.sub(r'\d+\.\s*', '', h).strip()
if h and len(h) > 3:
points.append(h)
return points
def extract_definitions(content):
"""Extract key definitions from the content."""
definitions = []
# Pattern for **Term** - definition or **Term**: definition
pattern = r'\*\*([^*\n]+)\*\*\s*[-:]\s*([^*\n]{20,150})'
matches = re.findall(pattern, content)
for term, definition in matches:
term = term.strip()
definition = definition.strip()
# Filter out non-definition patterns
if term and definition and not term.startswith('Przykład') and not term.startswith('Uwaga'):
definitions.append((term, definition))
return definitions[:5]
def clean_html(text):
"""Convert markdown to HTML and clean for Anki."""
if not text:
return ""
# Replace markdown bold/italic with HTML
text = re.sub(r'\*\*(.+?)\*\*', r'<b>\1</b>', text)
text = re.sub(r'\*(.+?)\*', r'<i>\1</i>', text)
# Clean up special characters
text = text.replace('\t', ' ')
text = text.replace('"', '&quot;')
# Handle newlines - convert to <br>
text = text.replace('\n', ' ')
text = re.sub(r'\s+', ' ', text)
return text.strip()
def process_file(filepath):
"""Process a single file and return flashcards."""
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
cards = []
# Extract metadata
filename = os.path.basename(filepath)
match = re.match(r'(\d+)-(.+)\.md', filename)
if match:
num = match.group(1)
topic = match.group(2).replace('-', '_')
else:
num = "00"
topic = "unknown"
subject = extract_subject(content)
main_question = extract_main_question(content, filename)
# Base tags for this question
base_tags = f"egzamin_magisterski pytanie_{num} {subject}"
# Card 1: Main question with key points
key_points = extract_key_points(content)
if key_points:
answer = "<ul>" + "".join([f"<li>{clean_html(p)}</li>" for p in key_points]) + "</ul>"
cards.append({
'front': clean_html(main_question),
'back': answer,
'tags': base_tags
})
# Card 2+: Key definitions as individual cards
definitions = extract_definitions(content)
for term, definition in definitions:
q = f"Definicja: {term}"
a = clean_html(definition)
cards.append({
'front': q,
'back': a,
'tags': f"{base_tags} definicje"
})
return cards
def main():
odpowiedzi_dir = Path("/home/kuchy/praca_magisterska/pytania/odpowiedzi")
output_file = Path("/home/kuchy/praca_magisterska/pytania/anki_egzamin_magisterski.txt")
all_cards = []
# Process each file
for md_file in sorted(odpowiedzi_dir.glob("*.md")):
print(f"Processing: {md_file.name}")
try:
cards = process_file(md_file)
all_cards.extend(cards)
print(f" -> {len(cards)} cards")
except Exception as e:
print(f" -> Error: {e}")
import traceback
traceback.print_exc()
# Write Anki-compatible file
with open(output_file, 'w', encoding='utf-8') as f:
# File headers for Anki
f.write("#separator:tab\n")
f.write("#html:true\n")
f.write("#tags column:3\n")
f.write("#deck:Egzamin Magisterski ISY\n")
f.write("#notetype:Basic\n")
f.write("\n")
for card in all_cards:
front = card['front']
back = card['back']
tags = card['tags']
# Ensure no tabs in content
front = front.replace('\t', ' ')
back = back.replace('\t', ' ')
f.write(f"{front}\t{back}\t{tags}\n")
print(f"\n✅ Created {len(all_cards)} flashcards")
print(f"📁 Output: {output_file}")
print("\n=== Import Instructions ===")
print("1. Open Anki desktop → File → Import")
print("2. Select: anki_egzamin_magisterski.txt")
print("3. Set 'Fields separated by: Tab'")
print("4. Check 'Allow HTML in fields'")
print("5. Map: Field 1 → Front, Field 2 → Back, Field 3 → Tags")
print("6. Click Import")
print("\nFor AnkiWeb/AnkiDroid: Sync after importing on desktop")
if __name__ == "__main__":
main()