#!/usr/bin/env python3
"""
Generate Anki flashcards from exam questions in odpowiedzi/ folder.
Creates a tab-separated file compatible with Anki import.
"""
import os
import re
from pathlib import Path
def extract_main_question(content, filename):
"""Extract the main exam question from the file."""
# Extract the main question from ## Pytanie section
question_match = re.search(r'## Pytanie\s*\n\s*\*\*["\']?(.+?)["\']?\*\*', content, re.DOTALL)
if question_match:
main_question = question_match.group(1).strip()
main_question = re.sub(r'\s+', ' ', main_question)
return main_question
# Fallback to title
title_match = re.search(r'^# (.+)$', content, re.MULTILINE)
return title_match.group(1) if title_match else filename
def extract_subject(content):
"""Extract the subject code."""
subject_match = re.search(r'Przedmiot:\s*(\w+)', content)
return subject_match.group(1) if subject_match else "Ogólne"
def extract_key_points(content):
"""Extract key points from the main answer section."""
points = []
# Look for main answer section
main_answer = re.search(r'## 📚 Odpowiedź główna\s*\n(.+?)(?=\n## [^�]|\n---\s*\n## |\Z)', content, re.DOTALL)
if not main_answer:
return points
answer_text = main_answer.group(1)
# Extract ### headers as key points
headers = re.findall(r'^### (.+)$', answer_text, re.MULTILINE)
for h in headers[:6]:
# Clean header
h = re.sub(r'\d+\.\s*', '', h).strip()
if h and len(h) > 3:
points.append(h)
return points
def extract_definitions(content):
"""Extract key definitions from the content."""
definitions = []
# Pattern for **Term** - definition or **Term**: definition
pattern = r'\*\*([^*\n]+)\*\*\s*[-–:]\s*([^*\n]{20,150})'
matches = re.findall(pattern, content)
for term, definition in matches:
term = term.strip()
definition = definition.strip()
# Filter out non-definition patterns
if term and definition and not term.startswith('Przykład') and not term.startswith('Uwaga'):
definitions.append((term, definition))
return definitions[:5]
def clean_html(text):
"""Convert markdown to HTML and clean for Anki."""
if not text:
return ""
# Replace markdown bold/italic with HTML
text = re.sub(r'\*\*(.+?)\*\*', r'\1', text)
text = re.sub(r'\*(.+?)\*', r'\1', text)
# Clean up special characters
text = text.replace('\t', ' ')
text = text.replace('"', '"')
# Handle newlines - convert to
text = text.replace('\n', ' ')
text = re.sub(r'\s+', ' ', text)
return text.strip()
def process_file(filepath):
"""Process a single file and return flashcards."""
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
cards = []
# Extract metadata
filename = os.path.basename(filepath)
match = re.match(r'(\d+)-(.+)\.md', filename)
if match:
num = match.group(1)
topic = match.group(2).replace('-', '_')
else:
num = "00"
topic = "unknown"
subject = extract_subject(content)
main_question = extract_main_question(content, filename)
# Base tags for this question
base_tags = f"egzamin_magisterski pytanie_{num} {subject}"
# Card 1: Main question with key points
key_points = extract_key_points(content)
if key_points:
answer = "