mirror of
https://github.com/kuhyx/praca_magisterska.git
synced 2026-07-04 13:43:05 +02:00
289 lines
11 KiB
Python
289 lines
11 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
"""
|
|||
|
|
Generate comprehensive Anki flashcards from exam questions.
|
|||
|
|
Creates tab-separated file for Anki import with proper HTML formatting.
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import os
|
|||
|
|
import re
|
|||
|
|
from pathlib import Path
|
|||
|
|
|
|||
|
|
def clean_text(text):
|
|||
|
|
"""Clean and format text for Anki."""
|
|||
|
|
if not text:
|
|||
|
|
return ""
|
|||
|
|
|
|||
|
|
# Convert markdown formatting to HTML
|
|||
|
|
text = re.sub(r'\*\*(.+?)\*\*', r'<b>\1</b>', text)
|
|||
|
|
text = re.sub(r'(?<!\*)\*([^*]+)\*(?!\*)', r'<i>\1</i>', text)
|
|||
|
|
|
|||
|
|
# Handle special characters
|
|||
|
|
text = text.replace('\t', ' ')
|
|||
|
|
text = text.replace('"', '"')
|
|||
|
|
|
|||
|
|
# Clean up whitespace but preserve intentional line breaks
|
|||
|
|
text = re.sub(r' +', ' ', text)
|
|||
|
|
text = text.strip()
|
|||
|
|
|
|||
|
|
return text
|
|||
|
|
|
|||
|
|
def format_list(items, numbered=False):
|
|||
|
|
"""Format a list of items as HTML."""
|
|||
|
|
if not items:
|
|||
|
|
return ""
|
|||
|
|
|
|||
|
|
tag = "ol" if numbered else "ul"
|
|||
|
|
html = f"<{tag}>"
|
|||
|
|
for item in items:
|
|||
|
|
cleaned = clean_text(item)
|
|||
|
|
if cleaned:
|
|||
|
|
html += f"<li>{cleaned}</li>"
|
|||
|
|
html += f"</{tag}>"
|
|||
|
|
return html
|
|||
|
|
|
|||
|
|
def extract_from_file(filepath):
|
|||
|
|
"""Extract flashcard data from a markdown file."""
|
|||
|
|
with open(filepath, 'r', encoding='utf-8') as f:
|
|||
|
|
content = f.read()
|
|||
|
|
|
|||
|
|
cards = []
|
|||
|
|
|
|||
|
|
# Get file metadata
|
|||
|
|
filename = os.path.basename(filepath)
|
|||
|
|
match = re.match(r'(\d+)-(.+)\.md', filename)
|
|||
|
|
num = match.group(1) if match else "00"
|
|||
|
|
topic_slug = match.group(2).replace('-', '_') if match else "unknown"
|
|||
|
|
|
|||
|
|
# Extract subject
|
|||
|
|
subj_match = re.search(r'Przedmiot:\s*(\w+)', content)
|
|||
|
|
subject = subj_match.group(1) if subj_match else "Ogólne"
|
|||
|
|
|
|||
|
|
# Base tags
|
|||
|
|
base_tags = f"egzamin_magisterski pyt{num} {subject}"
|
|||
|
|
|
|||
|
|
# =====================================================
|
|||
|
|
# CARD TYPE 1: Main Exam Question
|
|||
|
|
# =====================================================
|
|||
|
|
q_match = re.search(r'## Pytanie\s*\n\s*\*\*["\']?(.+?)["\']?\*\*', content, re.DOTALL)
|
|||
|
|
if q_match:
|
|||
|
|
main_q = re.sub(r'\s+', ' ', q_match.group(1).strip())
|
|||
|
|
|
|||
|
|
# Extract key topics from main answer
|
|||
|
|
answer_match = re.search(r'## 📚 Odpowiedź główna\s*\n(.+?)(?=\n## [<5B>🎯]|\n---\s*\n## |\Z)', content, re.DOTALL)
|
|||
|
|
if answer_match:
|
|||
|
|
answer_section = answer_match.group(1)
|
|||
|
|
# Get main headers
|
|||
|
|
headers = re.findall(r'^### (?:\d+\.\s*)?(.+)$', answer_section, re.MULTILINE)
|
|||
|
|
headers = [h.strip() for h in headers if len(h.strip()) > 3][:6]
|
|||
|
|
|
|||
|
|
if headers:
|
|||
|
|
answer_html = "<b>Kluczowe zagadnienia:</b>" + format_list(headers)
|
|||
|
|
cards.append({
|
|||
|
|
'front': clean_text(main_q),
|
|||
|
|
'back': answer_html,
|
|||
|
|
'tags': f"{base_tags} pytanie_glowne"
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
# =====================================================
|
|||
|
|
# CARD TYPE 2: Subsection Cards (detailed concepts)
|
|||
|
|
# =====================================================
|
|||
|
|
# Find all ### sections
|
|||
|
|
sections = re.findall(
|
|||
|
|
r'^### (?:\d+\.\s*)?(.+?)\n((?:(?!^###).)+)',
|
|||
|
|
content,
|
|||
|
|
re.MULTILINE | re.DOTALL
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
for header, body in sections:
|
|||
|
|
header = header.strip()
|
|||
|
|
body = body.strip()
|
|||
|
|
|
|||
|
|
# Skip very short sections or example sections
|
|||
|
|
if len(body) < 50 or header.lower().startswith('przykład'):
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
# Extract key information from body
|
|||
|
|
answer_parts = []
|
|||
|
|
|
|||
|
|
# Look for #### sub-headers
|
|||
|
|
subheaders = re.findall(r'^#### (.+)$', body, re.MULTILINE)
|
|||
|
|
if subheaders:
|
|||
|
|
answer_parts.extend(subheaders[:4])
|
|||
|
|
|
|||
|
|
# Look for bullet points with bold terms
|
|||
|
|
bullets = re.findall(r'[-•]\s*\*\*([^*]+)\*\*[:\s-]*([^\n]+)?', body)
|
|||
|
|
for term, desc in bullets[:5]:
|
|||
|
|
if desc:
|
|||
|
|
answer_parts.append(f"<b>{term}</b>: {desc.strip()}")
|
|||
|
|
else:
|
|||
|
|
answer_parts.append(f"<b>{term}</b>")
|
|||
|
|
|
|||
|
|
# If no structured content, get first paragraph
|
|||
|
|
if not answer_parts:
|
|||
|
|
paras = [p.strip() for p in body.split('\n\n')
|
|||
|
|
if p.strip() and not p.strip().startswith('```')
|
|||
|
|
and not p.strip().startswith('|')]
|
|||
|
|
if paras:
|
|||
|
|
first = paras[0]
|
|||
|
|
# Limit length
|
|||
|
|
if len(first) > 300:
|
|||
|
|
first = first[:300] + "..."
|
|||
|
|
answer_parts.append(first)
|
|||
|
|
|
|||
|
|
if answer_parts:
|
|||
|
|
# Determine card type
|
|||
|
|
if "Definicja" in header or "Co to" in header:
|
|||
|
|
q = f"Co to jest: {header.replace('Definicja', '').strip()}?"
|
|||
|
|
elif "Charakterystyka" in header:
|
|||
|
|
q = f"Scharakteryzuj: {header.replace('Charakterystyka', '').strip()}"
|
|||
|
|
elif header.endswith('?'):
|
|||
|
|
q = header
|
|||
|
|
else:
|
|||
|
|
q = f"Omów: {header}"
|
|||
|
|
|
|||
|
|
# Format answer
|
|||
|
|
if len(answer_parts) > 1:
|
|||
|
|
answer_html = format_list(answer_parts)
|
|||
|
|
else:
|
|||
|
|
answer_html = clean_text(answer_parts[0])
|
|||
|
|
|
|||
|
|
cards.append({
|
|||
|
|
'front': clean_text(q),
|
|||
|
|
'back': answer_html,
|
|||
|
|
'tags': f"{base_tags} szczegoly"
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
# =====================================================
|
|||
|
|
# CARD TYPE 3: Algorithms/Formulas
|
|||
|
|
# =====================================================
|
|||
|
|
algo_patterns = [
|
|||
|
|
r'#### Złożoność(?:\s+czasowa)?\s*\n(.+?)(?=\n####|\n###|\Z)',
|
|||
|
|
r'Złożoność:\s*\*\*([^*]+)\*\*',
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
for pattern in algo_patterns:
|
|||
|
|
matches = re.findall(pattern, content, re.DOTALL)
|
|||
|
|
for match in matches[:2]:
|
|||
|
|
if len(match) > 10:
|
|||
|
|
# Find context - which algorithm?
|
|||
|
|
algo_context = re.search(r'### (\d+\.\s*)?(.+?)(?=\n)', content[:content.find(match)])
|
|||
|
|
if algo_context:
|
|||
|
|
algo_name = algo_context.group(2).strip()
|
|||
|
|
cards.append({
|
|||
|
|
'front': f"Jaka jest złożoność algorytmu/metody: {algo_name}?",
|
|||
|
|
'back': clean_text(match.strip()[:200]),
|
|||
|
|
'tags': f"{base_tags} zlozonosc"
|
|||
|
|
})
|
|||
|
|
break
|
|||
|
|
|
|||
|
|
# =====================================================
|
|||
|
|
# CARD TYPE 4: Comparisons (when file contains comparisons)
|
|||
|
|
# =====================================================
|
|||
|
|
compare_match = re.search(r'## .*(Porównanie|Zestawienie|vs).*\n(.+?)(?=\n## |\Z)', content, re.DOTALL | re.IGNORECASE)
|
|||
|
|
if compare_match:
|
|||
|
|
compare_section = compare_match.group(2)
|
|||
|
|
# Extract comparison items
|
|||
|
|
items = re.findall(r'\|\s*\*\*([^|*]+)\*\*\s*\|([^|]+)\|', compare_section)
|
|||
|
|
if items:
|
|||
|
|
comparison_html = "<table><tr><th>Aspekt</th><th>Wartość</th></tr>"
|
|||
|
|
for aspect, value in items[:6]:
|
|||
|
|
comparison_html += f"<tr><td>{clean_text(aspect)}</td><td>{clean_text(value)}</td></tr>"
|
|||
|
|
comparison_html += "</table>"
|
|||
|
|
|
|||
|
|
# Get comparison title
|
|||
|
|
title_match = re.search(r'## .*(Porównanie|Zestawienie).*?(\w+.*?(?:vs|i|oraz).*?\w+)', compare_match.group(0), re.IGNORECASE)
|
|||
|
|
if title_match:
|
|||
|
|
cards.append({
|
|||
|
|
'front': f"Porównaj kluczowe różnice w temacie: pytanie {num}",
|
|||
|
|
'back': comparison_html,
|
|||
|
|
'tags': f"{base_tags} porownanie"
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
# =====================================================
|
|||
|
|
# CARD TYPE 5: Q&A from practice questions section
|
|||
|
|
# =====================================================
|
|||
|
|
qa_section = re.search(r'## 🎓 Pytania.*?\n(.+?)(?=\n## |\Z)', content, re.DOTALL)
|
|||
|
|
if qa_section:
|
|||
|
|
qa_content = qa_section.group(1)
|
|||
|
|
# Find Q&A pairs
|
|||
|
|
qas = re.findall(r'### Q\d+:?\s*["\']?(.+?)["\']?\s*\n.*?Odpowiedź:\s*\n?(.+?)(?=\n### |\Z)', qa_content, re.DOTALL)
|
|||
|
|
for q, a in qas[:3]:
|
|||
|
|
q = re.sub(r'\s+', ' ', q.strip())
|
|||
|
|
a = a.strip()
|
|||
|
|
if len(a) > 30:
|
|||
|
|
# Limit answer length
|
|||
|
|
a_lines = a.split('\n')
|
|||
|
|
a_short = '\n'.join(a_lines[:5])
|
|||
|
|
if len(a_short) > 400:
|
|||
|
|
a_short = a_short[:400] + "..."
|
|||
|
|
|
|||
|
|
cards.append({
|
|||
|
|
'front': clean_text(q),
|
|||
|
|
'back': clean_text(a_short).replace('\n', '<br>'),
|
|||
|
|
'tags': f"{base_tags} egzamin_praktyka"
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
return cards
|
|||
|
|
|
|||
|
|
def main():
|
|||
|
|
odpowiedzi_dir = Path("/home/kuchy/praca_magisterska/pytania/odpowiedzi")
|
|||
|
|
output_file = Path("/home/kuchy/praca_magisterska/pytania/anki_egzamin_magisterski.txt")
|
|||
|
|
|
|||
|
|
all_cards = []
|
|||
|
|
|
|||
|
|
for md_file in sorted(odpowiedzi_dir.glob("*.md")):
|
|||
|
|
print(f"Processing: {md_file.name}", end=" ")
|
|||
|
|
try:
|
|||
|
|
cards = extract_from_file(md_file)
|
|||
|
|
all_cards.extend(cards)
|
|||
|
|
print(f"→ {len(cards)} cards")
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"→ ERROR: {e}")
|
|||
|
|
|
|||
|
|
# Remove potential duplicates (same front)
|
|||
|
|
seen = set()
|
|||
|
|
unique_cards = []
|
|||
|
|
for card in all_cards:
|
|||
|
|
if card['front'] not in seen:
|
|||
|
|
seen.add(card['front'])
|
|||
|
|
unique_cards.append(card)
|
|||
|
|
|
|||
|
|
# Write output file
|
|||
|
|
with open(output_file, 'w', encoding='utf-8') as f:
|
|||
|
|
# Anki headers
|
|||
|
|
f.write("#separator:tab\n")
|
|||
|
|
f.write("#html:true\n")
|
|||
|
|
f.write("#tags column:3\n")
|
|||
|
|
f.write("#deck:Egzamin Magisterski ISY\n")
|
|||
|
|
f.write("#notetype:Basic\n")
|
|||
|
|
f.write("\n")
|
|||
|
|
|
|||
|
|
for card in unique_cards:
|
|||
|
|
# Ensure no tabs in content (would break parsing)
|
|||
|
|
front = card['front'].replace('\t', ' ')
|
|||
|
|
back = card['back'].replace('\t', ' ')
|
|||
|
|
tags = card['tags']
|
|||
|
|
|
|||
|
|
f.write(f"{front}\t{back}\t{tags}\n")
|
|||
|
|
|
|||
|
|
print(f"\n{'='*50}")
|
|||
|
|
print(f"✅ Generated {len(unique_cards)} unique flashcards")
|
|||
|
|
print(f"📁 Saved to: {output_file}")
|
|||
|
|
print(f"{'='*50}")
|
|||
|
|
print("\n📋 IMPORT INSTRUCTIONS:")
|
|||
|
|
print("─" * 40)
|
|||
|
|
print("Anki Desktop:")
|
|||
|
|
print(" 1. File → Import")
|
|||
|
|
print(" 2. Select: anki_egzamin_magisterski.txt")
|
|||
|
|
print(" 3. Verify: Fields separated by Tab")
|
|||
|
|
print(" 4. Check: Allow HTML in fields")
|
|||
|
|
print(" 5. Click Import")
|
|||
|
|
print()
|
|||
|
|
print("AnkiWeb / AnkiDroid:")
|
|||
|
|
print(" 1. First import on Anki Desktop")
|
|||
|
|
print(" 2. Click Sync to upload to AnkiWeb")
|
|||
|
|
print(" 3. Sync on mobile to download")
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
main()
|