mirror of
https://github.com/kuhyx/praca_magisterska.git
synced 2026-07-04 12:03:01 +02:00
1638 lines
69 KiB
Python
1638 lines
69 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
Generate all diagrams for PYTANIE 23: Segmentacja obrazu.
|
||
|
||
A4-compatible, monochrome-friendly (grays + one accent), 300 DPI.
|
||
"""
|
||
|
||
import matplotlib
|
||
matplotlib.use('Agg')
|
||
import matplotlib.pyplot as plt
|
||
import matplotlib.patches as mpatches
|
||
import matplotlib.patches as patches
|
||
from matplotlib.patches import FancyArrowPatch, FancyBboxPatch
|
||
import numpy as np
|
||
import os
|
||
|
||
DPI = 300
|
||
OUTPUT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'img')
|
||
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
||
|
||
# Color palette — monochrome-friendly
|
||
BLACK = '#000000'
|
||
WHITE = '#FFFFFF'
|
||
GRAY1 = '#F5F5F5'
|
||
GRAY2 = '#E0E0E0'
|
||
GRAY3 = '#BDBDBD'
|
||
GRAY4 = '#9E9E9E'
|
||
GRAY5 = '#757575'
|
||
GRAY6 = '#424242'
|
||
ACCENT = '#4A90D9' # single blue accent for highlights
|
||
ACCENT_LIGHT = '#B3D4FC'
|
||
RED_ACCENT = '#D32F2F'
|
||
GREEN_ACCENT = '#388E3C'
|
||
|
||
FS = 9
|
||
FS_TITLE = 11
|
||
FS_SMALL = 7
|
||
FS_TINY = 6
|
||
|
||
|
||
# ============================================================
|
||
# 1. OTSU — Bimodal histogram + within-class variance
|
||
# ============================================================
|
||
def generate_otsu_bimodal():
|
||
fig, axes = plt.subplots(1, 3, figsize=(11, 3.5))
|
||
|
||
# --- Panel 1: Bimodal histogram ---
|
||
ax = axes[0]
|
||
np.random.seed(42)
|
||
dark = np.random.normal(60, 20, 3000).clip(0, 255)
|
||
bright = np.random.normal(190, 25, 2000).clip(0, 255)
|
||
all_pixels = np.concatenate([dark, bright])
|
||
|
||
counts, bins, bars = ax.hist(all_pixels, bins=64, color=GRAY3, edgecolor=GRAY5, linewidth=0.5)
|
||
ax.axvline(x=128, color=RED_ACCENT, linewidth=2, linestyle='--', label='Próg Otsu T=128')
|
||
ax.fill_betweenx([0, max(counts)*1.1], 0, 128, alpha=0.12, color=ACCENT)
|
||
ax.fill_betweenx([0, max(counts)*1.1], 128, 255, alpha=0.12, color=RED_ACCENT)
|
||
ax.text(45, max(counts)*0.85, 'Klasa 0\n(tło)', ha='center', fontsize=FS, fontweight='bold', color=ACCENT)
|
||
ax.text(195, max(counts)*0.85, 'Klasa 1\n(obiekt)', ha='center', fontsize=FS, fontweight='bold', color=RED_ACCENT)
|
||
ax.annotate('Garb 1', xy=(60, max(counts)*0.6), fontsize=FS_SMALL, ha='center',
|
||
arrowprops=dict(arrowstyle='->', color=GRAY5), xytext=(30, max(counts)*0.45))
|
||
ax.annotate('Garb 2', xy=(190, max(counts)*0.5), fontsize=FS_SMALL, ha='center',
|
||
arrowprops=dict(arrowstyle='->', color=GRAY5), xytext=(220, max(counts)*0.35))
|
||
ax.set_xlabel('Jasność piksela (0–255)', fontsize=FS)
|
||
ax.set_ylabel('Liczba pikseli', fontsize=FS)
|
||
ax.set_title('Histogram bimodalny', fontsize=FS_TITLE, fontweight='bold')
|
||
ax.legend(fontsize=FS_SMALL, loc='upper right')
|
||
ax.set_xlim(0, 255)
|
||
|
||
# --- Panel 2: Within-class variance explanation ---
|
||
ax = axes[1]
|
||
ax.set_xlim(0, 10)
|
||
ax.set_ylim(0, 10)
|
||
ax.axis('off')
|
||
ax.set_title('Wariancja wewnątrzklasowa', fontsize=FS_TITLE, fontweight='bold')
|
||
|
||
y = 9.2
|
||
texts = [
|
||
('Wariancja = jak bardzo wartości\nróżnią się od średniej', FS, 'black', 'normal'),
|
||
('', 0, 'black', 'normal'),
|
||
('Klasa 0 (piksele ≤ T):', FS, ACCENT, 'bold'),
|
||
(' wartości: 30, 50, 45, 60, 55', FS_SMALL, 'black', 'normal'),
|
||
(' średnia μ₀ = 48', FS_SMALL, 'black', 'normal'),
|
||
(' σ₀² = ((30-48)²+(50-48)²+...)/5 = 108', FS_SMALL, 'black', 'normal'),
|
||
('', 0, 'black', 'normal'),
|
||
('Klasa 1 (piksele > T):', FS, RED_ACCENT, 'bold'),
|
||
(' wartości: 180, 200, 190, 210, 195', FS_SMALL, 'black', 'normal'),
|
||
(' średnia μ₁ = 195', FS_SMALL, 'black', 'normal'),
|
||
(' σ₁² = ((180-195)²+...)/5 = 100', FS_SMALL, 'black', 'normal'),
|
||
('', 0, 'black', 'normal'),
|
||
('σ²_wewnątrz = w₀·σ₀² + w₁·σ₁²', FS, BLACK, 'bold'),
|
||
('= 0.6·108 + 0.4·100 = 104.8', FS_SMALL, 'black', 'normal'),
|
||
('', 0, 'black', 'normal'),
|
||
('Otsu próbuje KAŻDE T: 0,1,...,255', FS_SMALL, GREEN_ACCENT, 'bold'),
|
||
('Wybiera T dające MINIMUM σ²_wewnątrz', FS_SMALL, GREEN_ACCENT, 'bold'),
|
||
]
|
||
for txt, size, color, weight in texts:
|
||
if txt == '':
|
||
y -= 0.25
|
||
continue
|
||
ax.text(0.3, y, txt, fontsize=size, color=color, fontweight=weight,
|
||
va='top', transform=ax.transAxes if False else None)
|
||
y -= 0.55
|
||
|
||
# --- Panel 3: Jednorodność explanation ---
|
||
ax = axes[2]
|
||
ax.set_xlim(0, 10)
|
||
ax.set_ylim(0, 10)
|
||
ax.axis('off')
|
||
ax.set_title('"Jednorodne" = małe σ²', fontsize=FS_TITLE, fontweight='bold')
|
||
|
||
# Draw two clusters
|
||
np.random.seed(7)
|
||
# Good separation
|
||
c0 = np.random.normal(2, 0.4, 15)
|
||
c1 = np.random.normal(7, 0.4, 15)
|
||
y_pos_0 = np.random.uniform(6, 8, 15)
|
||
y_pos_1 = np.random.uniform(6, 8, 15)
|
||
ax.scatter(c0, y_pos_0, c=ACCENT, s=30, zorder=5, label='Klasa 0')
|
||
ax.scatter(c1, y_pos_1, c=RED_ACCENT, s=30, zorder=5, label='Klasa 1')
|
||
ax.axvline(x=4.5, color=GREEN_ACCENT, linewidth=2, linestyle='--')
|
||
ax.text(4.5, 8.8, 'T optymalny', ha='center', fontsize=FS_SMALL, color=GREEN_ACCENT, fontweight='bold')
|
||
ax.text(2, 5.3, 'σ₀² mała\n(skupione)', ha='center', fontsize=FS_SMALL, color=ACCENT)
|
||
ax.text(7, 5.3, 'σ₁² mała\n(skupione)', ha='center', fontsize=FS_SMALL, color=RED_ACCENT)
|
||
ax.text(5, 4, '→ σ²_wewnątrz MINIMALNA\n→ klasy JEDNORODNE\n→ dobra segmentacja!',
|
||
ha='center', fontsize=FS, fontweight='bold', color=GREEN_ACCENT)
|
||
|
||
# Bad separation
|
||
c0b = np.random.normal(3.5, 1.5, 15)
|
||
c1b = np.random.normal(6, 1.5, 15)
|
||
y_pos_0b = np.random.uniform(1, 3, 15)
|
||
y_pos_1b = np.random.uniform(1, 3, 15)
|
||
ax.scatter(c0b, y_pos_0b, c=ACCENT, s=30, marker='x', zorder=5)
|
||
ax.scatter(c1b, y_pos_1b, c=RED_ACCENT, s=30, marker='x', zorder=5)
|
||
ax.axvline(x=4.5, color=GRAY4, linewidth=1, linestyle=':', ymin=0, ymax=0.35)
|
||
ax.text(5, 0.3, 'σ²_wewnątrz DUŻA → klasy mieszają się → zły próg',
|
||
ha='center', fontsize=FS_SMALL, color=GRAY5)
|
||
|
||
ax.legend(fontsize=FS_SMALL, loc='upper left')
|
||
|
||
plt.tight_layout()
|
||
plt.savefig(os.path.join(OUTPUT_DIR, 'q23_otsu_bimodal.png'), dpi=DPI, bbox_inches='tight',
|
||
facecolor='white')
|
||
plt.close()
|
||
print(" ✓ q23_otsu_bimodal.png")
|
||
|
||
|
||
# ============================================================
|
||
# 2. WATERSHED — Topographic flooding (not ASCII!)
|
||
# ============================================================
|
||
def generate_watershed():
|
||
fig, axes = plt.subplots(1, 3, figsize=(11, 3.8))
|
||
|
||
# --- Panel 1: Image as topographic surface ---
|
||
ax = axes[0]
|
||
x = np.linspace(0, 10, 200)
|
||
# Create a surface with two valleys and a ridge
|
||
surface = 3 * np.exp(-((x - 3)**2) / 1.5) + 4 * np.exp(-((x - 7)**2) / 1.2) + \
|
||
0.5 * np.sin(x * 2) + 1
|
||
# Invert: valleys at objects (dark), peaks at boundaries (bright)
|
||
surface_inv = 6 - surface + 1
|
||
|
||
ax.fill_between(x, 0, surface_inv, color=GRAY2, alpha=0.7)
|
||
ax.plot(x, surface_inv, color=BLACK, linewidth=1.5)
|
||
|
||
# Mark valleys
|
||
ax.annotate('Dolina 1\n(obiekt A)', xy=(3, surface_inv[60]), fontsize=FS_SMALL,
|
||
ha='center', va='bottom',
|
||
arrowprops=dict(arrowstyle='->', color=ACCENT), xytext=(1.5, 5.5))
|
||
ax.annotate('Dolina 2\n(obiekt B)', xy=(7, surface_inv[140]), fontsize=FS_SMALL,
|
||
ha='center', va='bottom',
|
||
arrowprops=dict(arrowstyle='->', color=RED_ACCENT), xytext=(8.5, 5.5))
|
||
# Mark ridge
|
||
ax.annotate('Grań\n(granica)', xy=(5, surface_inv[100]), fontsize=FS_SMALL,
|
||
ha='center', va='bottom',
|
||
arrowprops=dict(arrowstyle='->', color=GREEN_ACCENT), xytext=(5, 6.5))
|
||
|
||
ax.set_xlabel('Pozycja piksela', fontsize=FS)
|
||
ax.set_ylabel('Jasność (= wysokość)', fontsize=FS)
|
||
ax.set_title('Krok 1: obraz → teren', fontsize=FS_TITLE, fontweight='bold')
|
||
ax.set_ylim(0, 7)
|
||
|
||
# --- Panel 2: Flooding ---
|
||
ax = axes[1]
|
||
ax.fill_between(x, 0, surface_inv, color=GRAY2, alpha=0.7)
|
||
ax.plot(x, surface_inv, color=BLACK, linewidth=1.5)
|
||
|
||
# Water level
|
||
water_level = 3.2
|
||
water_mask_1 = (x < 5) & (surface_inv < water_level)
|
||
water_mask_2 = (x >= 5) & (surface_inv < water_level)
|
||
|
||
# Fill water in valley 1
|
||
x_v1 = x[(x > 1) & (x < 5)]
|
||
s_v1 = surface_inv[(x > 1) & (x < 5)]
|
||
ax.fill_between(x_v1, s_v1, water_level, where=s_v1 < water_level,
|
||
color=ACCENT_LIGHT, alpha=0.6)
|
||
# Fill water in valley 2
|
||
x_v2 = x[(x > 5) & (x < 9)]
|
||
s_v2 = surface_inv[(x > 5) & (x < 9)]
|
||
ax.fill_between(x_v2, s_v2, water_level, where=s_v2 < water_level,
|
||
color='#FFCDD2', alpha=0.6)
|
||
|
||
ax.axhline(y=water_level, color=ACCENT, linewidth=1, linestyle='--', alpha=0.5)
|
||
ax.text(3, 2.5, 'Woda A', fontsize=FS, ha='center', color=ACCENT, fontweight='bold')
|
||
ax.text(7, 2.2, 'Woda B', fontsize=FS, ha='center', color=RED_ACCENT, fontweight='bold')
|
||
ax.annotate('Tu się spotkają!\n→ GRANICA', xy=(5, surface_inv[100]), fontsize=FS_SMALL,
|
||
ha='center', color=GREEN_ACCENT, fontweight='bold',
|
||
arrowprops=dict(arrowstyle='->', color=GREEN_ACCENT), xytext=(5, 6.2))
|
||
|
||
ax.set_xlabel('Pozycja piksela', fontsize=FS)
|
||
ax.set_title('Krok 2: zalewanie', fontsize=FS_TITLE, fontweight='bold')
|
||
ax.set_ylim(0, 7)
|
||
|
||
# --- Panel 3: Result with problem ---
|
||
ax = axes[2]
|
||
ax.set_xlim(0, 10)
|
||
ax.set_ylim(0, 10)
|
||
ax.axis('off')
|
||
ax.set_title('Krok 3: wynik', fontsize=FS_TITLE, fontweight='bold')
|
||
|
||
# Good result
|
||
rect1 = FancyBboxPatch((0.5, 6), 3.5, 3.2, boxstyle="round,pad=0.1",
|
||
facecolor=ACCENT_LIGHT, edgecolor=BLACK, linewidth=1)
|
||
ax.add_patch(rect1)
|
||
ax.text(2.25, 8.8, 'Ideał: 2 segmenty', fontsize=FS, ha='center', fontweight='bold')
|
||
ax.text(2.25, 7.5, 'Segment A Segment B', fontsize=FS_SMALL, ha='center')
|
||
ax.text(2.25, 6.7, '(po marker-controlled)', fontsize=FS_SMALL, ha='center', color=GREEN_ACCENT)
|
||
|
||
# Bad result (over-segmentation)
|
||
rect2 = FancyBboxPatch((5.5, 6), 4, 3.2, boxstyle="round,pad=0.1",
|
||
facecolor='#FFCDD2', edgecolor=BLACK, linewidth=1)
|
||
ax.add_patch(rect2)
|
||
ax.text(7.5, 8.8, 'Problem: over-segmentation', fontsize=FS, ha='center', fontweight='bold',
|
||
color=RED_ACCENT)
|
||
ax.text(7.5, 7.8, '47 regionów zamiast 2!', fontsize=FS_SMALL, ha='center', color=RED_ACCENT)
|
||
ax.text(7.5, 7.1, 'Każde mini-minimum', fontsize=FS_SMALL, ha='center')
|
||
ax.text(7.5, 6.5, '→ osobna „dolina"', fontsize=FS_SMALL, ha='center')
|
||
|
||
# Solution: markers
|
||
rect3 = FancyBboxPatch((1, 0.5), 8, 4.5, boxstyle="round,pad=0.15",
|
||
facecolor=GRAY1, edgecolor=GREEN_ACCENT, linewidth=1.5)
|
||
ax.add_patch(rect3)
|
||
ax.text(5, 4.3, 'Rozwiązanie: Marker-controlled watershed', fontsize=FS,
|
||
ha='center', fontweight='bold', color=GREEN_ACCENT)
|
||
ax.text(5, 3.4, '1. Zaznacz ręcznie „seeds" (markery) w każdym obiekcie', fontsize=FS_SMALL, ha='center')
|
||
ax.text(5, 2.7, '2. Zalewaj TYLKO od tych markerów (nie od wszystkich minimów)', fontsize=FS_SMALL, ha='center')
|
||
ax.text(5, 2.0, '3. Eliminuje fałszywe doliny z szumu', fontsize=FS_SMALL, ha='center')
|
||
ax.text(5, 1.2, 'Wynik: tyle segmentów, ile podano markerów', fontsize=FS_SMALL, ha='center',
|
||
fontweight='bold')
|
||
|
||
plt.tight_layout()
|
||
plt.savefig(os.path.join(OUTPUT_DIR, 'q23_watershed.png'), dpi=DPI, bbox_inches='tight',
|
||
facecolor='white')
|
||
plt.close()
|
||
print(" ✓ q23_watershed.png")
|
||
|
||
|
||
# ============================================================
|
||
# 3. MEAN SHIFT — Kernel, density, feature space
|
||
# ============================================================
|
||
def generate_mean_shift():
|
||
fig, axes = plt.subplots(1, 3, figsize=(11, 4))
|
||
|
||
# --- Panel 1: Feature space concept ---
|
||
ax = axes[0]
|
||
np.random.seed(42)
|
||
# Three clusters in 2D feature space (brightness, x-position)
|
||
c1x = np.random.normal(2, 0.5, 40)
|
||
c1y = np.random.normal(2, 0.5, 40)
|
||
c2x = np.random.normal(6, 0.6, 35)
|
||
c2y = np.random.normal(7, 0.5, 35)
|
||
c3x = np.random.normal(8, 0.4, 25)
|
||
c3y = np.random.normal(3, 0.6, 25)
|
||
|
||
ax.scatter(c1x, c1y, c=GRAY4, s=15, alpha=0.7, zorder=3)
|
||
ax.scatter(c2x, c2y, c=GRAY4, s=15, alpha=0.7, zorder=3)
|
||
ax.scatter(c3x, c3y, c=GRAY4, s=15, alpha=0.7, zorder=3)
|
||
|
||
# Label peaks
|
||
ax.scatter([2], [2], c=RED_ACCENT, s=80, marker='*', zorder=5, label='Max gęstości')
|
||
ax.scatter([6], [7], c=RED_ACCENT, s=80, marker='*', zorder=5)
|
||
ax.scatter([8], [3], c=RED_ACCENT, s=80, marker='*', zorder=5)
|
||
|
||
ax.set_xlabel('Cecha 1: jasność', fontsize=FS)
|
||
ax.set_ylabel('Cecha 2: pozycja x', fontsize=FS)
|
||
ax.set_title('Przestrzeń cech', fontsize=FS_TITLE, fontweight='bold')
|
||
ax.text(2, 0.3, 'Klaster 1\n(ciemne, lewo)', ha='center', fontsize=FS_TINY, color=GRAY6)
|
||
ax.text(6, 5.3, 'Klaster 2\n(jasne, prawo)', ha='center', fontsize=FS_TINY, color=GRAY6)
|
||
ax.text(8, 1.3, 'Klaster 3\n(jasne, dół)', ha='center', fontsize=FS_TINY, color=GRAY6)
|
||
ax.legend(fontsize=FS_SMALL, loc='upper left')
|
||
|
||
# --- Panel 2: Kernel/window moving ---
|
||
ax = axes[1]
|
||
ax.scatter(c1x, c1y, c=ACCENT_LIGHT, s=15, alpha=0.7, zorder=3)
|
||
ax.scatter(c2x, c2y, c=GRAY3, s=15, alpha=0.7, zorder=3)
|
||
ax.scatter(c3x, c3y, c=GRAY3, s=15, alpha=0.7, zorder=3)
|
||
|
||
# Show kernel movement
|
||
path_x = [4.5, 3.8, 3.0, 2.3, 2.05]
|
||
path_y = [4.0, 3.3, 2.7, 2.2, 2.03]
|
||
|
||
for i, (px, py) in enumerate(zip(path_x, path_y)):
|
||
alpha = 0.3 + 0.15 * i
|
||
circle = plt.Circle((px, py), 1.2, fill=False, edgecolor=ACCENT,
|
||
linewidth=1.5, linestyle='--' if i < len(path_x)-1 else '-',
|
||
alpha=alpha)
|
||
ax.add_patch(circle)
|
||
if i < len(path_x) - 1:
|
||
ax.annotate('', xy=(path_x[i+1], path_y[i+1]),
|
||
xytext=(px, py),
|
||
arrowprops=dict(arrowstyle='->', color=RED_ACCENT, lw=1.5))
|
||
|
||
ax.scatter([path_x[0]], [path_y[0]], c=ACCENT, s=50, marker='o', zorder=5)
|
||
ax.scatter([path_x[-1]], [path_y[-1]], c=RED_ACCENT, s=80, marker='*', zorder=5)
|
||
|
||
ax.text(4.5, 5.2, 'Start: losowy\npiksel', fontsize=FS_SMALL, ha='center', color=ACCENT)
|
||
ax.text(2.05, 0.5, 'Koniec: max\ngęstości', fontsize=FS_SMALL, ha='center', color=RED_ACCENT,
|
||
fontweight='bold')
|
||
ax.text(7, 8, 'Okno (jądro)\nprzesuwa się\ndo skupiska', fontsize=FS_SMALL, ha='center',
|
||
color=GRAY6,
|
||
bbox=dict(boxstyle='round', facecolor=GRAY1, edgecolor=GRAY3))
|
||
|
||
ax.set_xlabel('Cecha 1', fontsize=FS)
|
||
ax.set_ylabel('Cecha 2', fontsize=FS)
|
||
ax.set_title('Jądro → max gęstości', fontsize=FS_TITLE, fontweight='bold')
|
||
ax.set_xlim(0, 10)
|
||
ax.set_ylim(0, 9)
|
||
|
||
# --- Panel 3: Why no K parameter ---
|
||
ax = axes[2]
|
||
ax.set_xlim(0, 10)
|
||
ax.set_ylim(0, 10)
|
||
ax.axis('off')
|
||
ax.set_title('Dlaczego bez K?', fontsize=FS_TITLE, fontweight='bold')
|
||
|
||
y = 9.0
|
||
lines = [
|
||
('K-means wymaga:', FS, RED_ACCENT, 'bold'),
|
||
(' „Podaj K=3 klastry"', FS_SMALL, 'black', 'normal'),
|
||
(' Problem: skąd wiesz ile klastrów?', FS_SMALL, GRAY5, 'normal'),
|
||
('', 0, '', ''),
|
||
('Mean Shift NIE wymaga K:', FS, GREEN_ACCENT, 'bold'),
|
||
(' Każdy piksel startuje → toczy się', FS_SMALL, 'black', 'normal'),
|
||
(' → trafia do najbliższego szczytu', FS_SMALL, 'black', 'normal'),
|
||
(' → ile szczytów = tyle segmentów', FS_SMALL, 'black', 'normal'),
|
||
(' → automatycznie!', FS_SMALL, GREEN_ACCENT, 'bold'),
|
||
('', 0, '', ''),
|
||
('Parametr: bandwidth (szerokość okna)', FS, 'black', 'bold'),
|
||
(' Duże okno → mało segmentów', FS_SMALL, 'black', 'normal'),
|
||
(' Małe okno → dużo segmentów', FS_SMALL, 'black', 'normal'),
|
||
('', 0, '', ''),
|
||
('Okno = jądro (kernel):', FS, 'black', 'bold'),
|
||
(' Koło o promieniu h wokół punktu.', FS_SMALL, 'black', 'normal'),
|
||
(' Oblicz średnią pikseli W oknie.', FS_SMALL, 'black', 'normal'),
|
||
(' Przesuń okno na tę średnią.', FS_SMALL, 'black', 'normal'),
|
||
(' Powtórz aż się zatrzyma.', FS_SMALL, 'black', 'normal'),
|
||
]
|
||
for txt, size, color, weight in lines:
|
||
if txt == '':
|
||
y -= 0.2
|
||
continue
|
||
ax.text(0.5, y, txt, fontsize=size, color=color, fontweight=weight, va='top')
|
||
y -= 0.5
|
||
|
||
plt.tight_layout()
|
||
plt.savefig(os.path.join(OUTPUT_DIR, 'q23_mean_shift.png'), dpi=DPI, bbox_inches='tight',
|
||
facecolor='white')
|
||
plt.close()
|
||
print(" ✓ q23_mean_shift.png")
|
||
|
||
|
||
# ============================================================
|
||
# 4. NORMALIZED CUTS — Graph cut visualization
|
||
# ============================================================
|
||
def generate_normalized_cuts():
|
||
fig, axes = plt.subplots(1, 3, figsize=(11, 4))
|
||
|
||
# --- Panel 1: Image as graph ---
|
||
ax = axes[0]
|
||
ax.set_xlim(-0.5, 4.5)
|
||
ax.set_ylim(-0.5, 4.5)
|
||
ax.set_aspect('equal')
|
||
ax.set_title('Obraz → graf', fontsize=FS_TITLE, fontweight='bold')
|
||
|
||
# Draw 4x4 pixel grid with colors
|
||
pixel_vals = np.array([
|
||
[30, 35, 180, 190],
|
||
[40, 30, 185, 200],
|
||
[170, 180, 40, 35],
|
||
[190, 175, 30, 45],
|
||
])
|
||
for i in range(4):
|
||
for j in range(4):
|
||
v = pixel_vals[i, j]
|
||
gray_val = v / 255.0
|
||
color = str(gray_val)
|
||
rect = patches.Rectangle((j - 0.4, 3 - i - 0.4), 0.8, 0.8,
|
||
facecolor=(gray_val, gray_val, gray_val),
|
||
edgecolor=BLACK, linewidth=0.8)
|
||
ax.add_patch(rect)
|
||
text_color = 'white' if v < 100 else 'black'
|
||
ax.text(j, 3 - i, str(v), ha='center', va='center', fontsize=FS_SMALL,
|
||
color=text_color, fontweight='bold')
|
||
|
||
# Draw edges between adjacent pixels
|
||
for i in range(4):
|
||
for j in range(4):
|
||
# Right neighbor
|
||
if j < 3:
|
||
similarity = max(0, 1 - abs(pixel_vals[i, j] - pixel_vals[i, j+1]) / 255)
|
||
lw = similarity * 2.5 + 0.3
|
||
alpha = similarity * 0.8 + 0.2
|
||
ax.plot([j + 0.4, j + 0.6], [3 - i, 3 - i], color=GRAY5,
|
||
linewidth=lw, alpha=alpha)
|
||
# Bottom neighbor
|
||
if i < 3:
|
||
similarity = max(0, 1 - abs(pixel_vals[i, j] - pixel_vals[i+1, j]) / 255)
|
||
lw = similarity * 2.5 + 0.3
|
||
alpha = similarity * 0.8 + 0.2
|
||
ax.plot([j, j], [3 - i - 0.4, 3 - i - 0.6], color=GRAY5,
|
||
linewidth=lw, alpha=alpha)
|
||
|
||
ax.text(2, -0.8, 'Grube linie = duże podobieństwo\n(silna krawędź grafu)',
|
||
ha='center', fontsize=FS_TINY, color=GRAY5)
|
||
ax.axis('off')
|
||
|
||
# --- Panel 2: Cut concept ---
|
||
ax = axes[1]
|
||
ax.set_xlim(0, 10)
|
||
ax.set_ylim(0, 10)
|
||
ax.axis('off')
|
||
ax.set_title('Cięcie grafu (graph cut)', fontsize=FS_TITLE, fontweight='bold')
|
||
|
||
# Draw two groups of nodes
|
||
# Group A (dark pixels)
|
||
positions_A = [(2, 7), (3, 8), (2, 5), (3, 6)]
|
||
positions_B = [(7, 7), (8, 8), (7, 5), (8, 6)]
|
||
|
||
# Intra-group edges (thick = similar)
|
||
for i, (x1, y1) in enumerate(positions_A):
|
||
for x2, y2 in positions_A[i+1:]:
|
||
ax.plot([x1, x2], [y1, y2], color=ACCENT, linewidth=2, alpha=0.5)
|
||
for i, (x1, y1) in enumerate(positions_B):
|
||
for x2, y2 in positions_B[i+1:]:
|
||
ax.plot([x1, x2], [y1, y2], color=RED_ACCENT, linewidth=2, alpha=0.5)
|
||
|
||
# Inter-group edges (thin = dissimilar) — these get cut
|
||
cut_edges = [((3, 8), (7, 7)), ((3, 6), (7, 5)), ((2, 5), (7, 5))]
|
||
for (x1, y1), (x2, y2) in cut_edges:
|
||
ax.plot([x1, x2], [y1, y2], color=GRAY4, linewidth=0.8, linestyle='--')
|
||
|
||
# Draw nodes
|
||
for x, y in positions_A:
|
||
ax.scatter(x, y, c=ACCENT, s=120, zorder=5, edgecolors=BLACK, linewidth=0.8)
|
||
for x, y in positions_B:
|
||
ax.scatter(x, y, c='#FFCDD2', s=120, zorder=5, edgecolors=BLACK, linewidth=0.8)
|
||
|
||
# Cut line
|
||
ax.plot([5, 5], [3.5, 9.5], color=RED_ACCENT, linewidth=2.5, linestyle='-',
|
||
zorder=4)
|
||
ax.text(5, 9.8, 'CIĘCIE', ha='center', fontsize=FS, fontweight='bold', color=RED_ACCENT)
|
||
|
||
ax.text(2.5, 3.8, 'Segment A\n(ciemne piksele)', ha='center', fontsize=FS_SMALL, color=ACCENT)
|
||
ax.text(7.5, 3.8, 'Segment B\n(jasne piksele)', ha='center', fontsize=FS_SMALL, color=RED_ACCENT)
|
||
|
||
# Formula
|
||
ax.text(5, 1.8, 'Ncut(A,B) = cut(A,B)/assoc(A,V)\n + cut(A,B)/assoc(B,V)',
|
||
ha='center', fontsize=FS_SMALL, fontweight='bold',
|
||
bbox=dict(boxstyle='round', facecolor=GRAY1, edgecolor=GRAY3))
|
||
ax.text(5, 0.5, 'Minimalizuj Ncut → tnij SŁABE krawędzie\nzachowuj SILNE (wewnątrz grupy)',
|
||
ha='center', fontsize=FS_TINY, color=GRAY5)
|
||
|
||
# --- Panel 3: Algorithm summary ---
|
||
ax = axes[2]
|
||
ax.set_xlim(0, 10)
|
||
ax.set_ylim(0, 10)
|
||
ax.axis('off')
|
||
ax.set_title('Algorytm Normalized Cuts', fontsize=FS_TITLE, fontweight='bold')
|
||
|
||
steps = [
|
||
('1. Zbuduj graf', 'Piksele = węzły\nKrawędzie = podobieństwo sąsiadów\n(kolor, jasność, odległość)'),
|
||
('2. Macierz podobieństwa W', 'W[i,j] = exp(-|kolori - kolorj|² / σ²)\n→ im podobniejsze, tym wyższa waga'),
|
||
('3. Macierz stopni D', 'D[i,i] = Σ W[i,j]\n(suma wszystkich wag z węzła i)'),
|
||
('4. Rozwiąż problem własny', '(D-W)·y = λ·D·y\n→ drugi najm. wektor własny y'),
|
||
('5. Podziel wg y', 'y[i] > 0 → segment A\ny[i] ≤ 0 → segment B'),
|
||
]
|
||
|
||
y = 9.5
|
||
for title, desc in steps:
|
||
ax.text(0.5, y, title, fontsize=FS, fontweight='bold', va='top')
|
||
y -= 0.4
|
||
ax.text(0.8, y, desc, fontsize=FS_TINY, va='top', color=GRAY6)
|
||
y -= 1.2
|
||
|
||
ax.text(5, 0.3, 'Złożoność: O(n³) — wymaga eigen decomposition!',
|
||
ha='center', fontsize=FS_SMALL, fontweight='bold', color=RED_ACCENT)
|
||
|
||
plt.tight_layout()
|
||
plt.savefig(os.path.join(OUTPUT_DIR, 'q23_normalized_cuts.png'), dpi=DPI, bbox_inches='tight',
|
||
facecolor='white')
|
||
plt.close()
|
||
print(" ✓ q23_normalized_cuts.png")
|
||
|
||
|
||
# ============================================================
|
||
# 5. RELU — Function plot
|
||
# ============================================================
|
||
def generate_relu():
|
||
fig, axes = plt.subplots(1, 2, figsize=(8, 3.5))
|
||
|
||
# --- Panel 1: ReLU plot ---
|
||
ax = axes[0]
|
||
x = np.linspace(-5, 5, 200)
|
||
relu = np.maximum(0, x)
|
||
ax.plot(x, relu, color=ACCENT, linewidth=2.5, label='ReLU(x) = max(0, x)')
|
||
ax.axhline(y=0, color=GRAY3, linewidth=0.5)
|
||
ax.axvline(x=0, color=GRAY3, linewidth=0.5)
|
||
ax.fill_between(x[x < 0], 0, 0, color=RED_ACCENT, alpha=0.1)
|
||
ax.fill_between(x[x >= 0], 0, relu[x >= 0], color=ACCENT, alpha=0.1)
|
||
|
||
# Annotations
|
||
ax.annotate('x < 0 → output = 0\n(neuron „wyłączony")', xy=(-3, 0),
|
||
fontsize=FS_SMALL, ha='center', va='bottom', color=RED_ACCENT,
|
||
arrowprops=dict(arrowstyle='->', color=RED_ACCENT), xytext=(-3, 2))
|
||
ax.annotate('x ≥ 0 → output = x\n(neuron „włączony")', xy=(3, 3),
|
||
fontsize=FS_SMALL, ha='center', va='bottom', color=ACCENT,
|
||
arrowprops=dict(arrowstyle='->', color=ACCENT), xytext=(3, 4.5))
|
||
ax.scatter([0], [0], c=BLACK, s=40, zorder=5)
|
||
ax.text(0.3, -0.5, '(0,0)', fontsize=FS_SMALL, color=GRAY5)
|
||
ax.set_xlabel('x (wejście neuronu)', fontsize=FS)
|
||
ax.set_ylabel('ReLU(x)', fontsize=FS)
|
||
ax.set_title('ReLU — Rectified Linear Unit', fontsize=FS_TITLE, fontweight='bold')
|
||
ax.legend(fontsize=FS_SMALL, loc='upper left')
|
||
ax.set_ylim(-1, 6)
|
||
ax.grid(True, alpha=0.2)
|
||
|
||
# --- Panel 2: Why ReLU ---
|
||
ax = axes[1]
|
||
ax.set_xlim(0, 10)
|
||
ax.set_ylim(0, 10)
|
||
ax.axis('off')
|
||
ax.set_title('Dlaczego ReLU?', fontsize=FS_TITLE, fontweight='bold')
|
||
|
||
y = 9.0
|
||
lines = [
|
||
('Neuron oblicza:', FS, BLACK, 'bold'),
|
||
(' z = w₁·x₁ + w₂·x₂ + ... + bias', FS_SMALL, BLACK, 'normal'),
|
||
(' output = ReLU(z) = max(0, z)', FS_SMALL, ACCENT, 'bold'),
|
||
('', 0, '', ''),
|
||
('Przykład:', FS, BLACK, 'bold'),
|
||
(' wagi: w₁=0.5, w₂=-0.3, bias=0.1', FS_SMALL, BLACK, 'normal'),
|
||
(' wejścia: x₁=2.0, x₂=4.0', FS_SMALL, BLACK, 'normal'),
|
||
(' z = 0.5·2 + (-0.3)·4 + 0.1 = -0.1', FS_SMALL, BLACK, 'normal'),
|
||
(' ReLU(-0.1) = max(0, -0.1) = 0', FS_SMALL, RED_ACCENT, 'bold'),
|
||
(' → neuron milczy (wejście nieistotne)', FS_SMALL, GRAY5, 'normal'),
|
||
('', 0, '', ''),
|
||
('Gdyby z = 2.3:', FS, BLACK, 'bold'),
|
||
(' ReLU(2.3) = max(0, 2.3) = 2.3', FS_SMALL, GREEN_ACCENT, 'bold'),
|
||
(' → neuron aktywny! Przekazuje sygnał', FS_SMALL, GRAY5, 'normal'),
|
||
('', 0, '', ''),
|
||
('Szybsza niż sigmoid/tanh', FS_SMALL, GRAY5, 'normal'),
|
||
('(brak exp() → szybkie obliczenia)', FS_SMALL, GRAY5, 'normal'),
|
||
]
|
||
for txt, size, color, weight in lines:
|
||
if txt == '':
|
||
y -= 0.2
|
||
continue
|
||
ax.text(0.5, y, txt, fontsize=size, color=color, fontweight=weight, va='top')
|
||
y -= 0.5
|
||
|
||
plt.tight_layout()
|
||
plt.savefig(os.path.join(OUTPUT_DIR, 'q23_relu.png'), dpi=DPI, bbox_inches='tight',
|
||
facecolor='white')
|
||
plt.close()
|
||
print(" ✓ q23_relu.png")
|
||
|
||
|
||
# ============================================================
|
||
# 6. DOT PRODUCT — Iloczyn skalarny visual
|
||
# ============================================================
|
||
def generate_dot_product():
|
||
fig, axes = plt.subplots(1, 3, figsize=(11, 3.5))
|
||
|
||
# --- Panel 1: Concept ---
|
||
ax = axes[0]
|
||
ax.set_xlim(0, 10)
|
||
ax.set_ylim(0, 10)
|
||
ax.axis('off')
|
||
ax.set_title('Iloczyn skalarny\n(dot product)', fontsize=FS_TITLE, fontweight='bold')
|
||
|
||
y = 8.5
|
||
lines = [
|
||
('Dwa wektory (listy liczb) → JEDNA liczba', FS, BLACK, 'bold'),
|
||
('', 0, '', ''),
|
||
('a = [a₁, a₂, a₃] b = [b₁, b₂, b₃]', FS, ACCENT, 'normal'),
|
||
('', 0, '', ''),
|
||
('a · b = a₁·b₁ + a₂·b₂ + a₃·b₃', FS, BLACK, 'bold'),
|
||
('', 0, '', ''),
|
||
('Przykład:', FS, BLACK, 'bold'),
|
||
('a = [1, 3, -2] b = [4, -1, 5]', FS_SMALL, BLACK, 'normal'),
|
||
('a·b = 1·4 + 3·(-1) + (-2)·5', FS_SMALL, BLACK, 'normal'),
|
||
(' = 4 + (-3) + (-10) = -9', FS_SMALL, RED_ACCENT, 'bold'),
|
||
('', 0, '', ''),
|
||
('Duży wynik → wektory „podobne" (w tym samym kierunku)', FS_SMALL, GREEN_ACCENT, 'normal'),
|
||
('Mały/ujemny → wektory „różne"', FS_SMALL, RED_ACCENT, 'normal'),
|
||
]
|
||
for txt, size, color, weight in lines:
|
||
if txt == '':
|
||
y -= 0.25
|
||
continue
|
||
ax.text(0.5, y, txt, fontsize=size, color=color, fontweight=weight, va='top')
|
||
y -= 0.55
|
||
|
||
# --- Panel 2: Convolution as dot product ---
|
||
ax = axes[1]
|
||
ax.set_xlim(-0.5, 5.5)
|
||
ax.set_ylim(-0.5, 5.5)
|
||
ax.set_aspect('equal')
|
||
ax.set_title('Konwolucja = iloczyn skalarny\nfiltra × fragment obrazu', fontsize=FS_TITLE, fontweight='bold')
|
||
|
||
# Filter 3x3
|
||
filter_vals = [[-1, 0, 1], [-1, 0, 1], [-1, 0, 1]]
|
||
for i in range(3):
|
||
for j in range(3):
|
||
rect = patches.Rectangle((j - 0.4, 4 - i - 0.4), 0.8, 0.8,
|
||
facecolor=ACCENT_LIGHT, edgecolor=BLACK, linewidth=0.8)
|
||
ax.add_patch(rect)
|
||
ax.text(j, 4 - i, str(filter_vals[i][j]), ha='center', va='center',
|
||
fontsize=FS, fontweight='bold')
|
||
|
||
ax.text(1, 1.5, 'Filtr', ha='center', fontsize=FS, fontweight='bold', color=ACCENT)
|
||
|
||
# Image patch
|
||
img_vals = [[50, 50, 200], [50, 50, 200], [50, 50, 200]]
|
||
for i in range(3):
|
||
for j in range(3):
|
||
rect = patches.Rectangle((j + 2.6, 4 - i - 0.4), 0.8, 0.8,
|
||
facecolor=GRAY2, edgecolor=BLACK, linewidth=0.8)
|
||
ax.add_patch(rect)
|
||
ax.text(j + 3, 4 - i, str(img_vals[i][j]), ha='center', va='center',
|
||
fontsize=FS, fontweight='bold')
|
||
|
||
ax.text(4, 1.5, 'Fragment\nobrazu', ha='center', fontsize=FS, fontweight='bold', color=GRAY5)
|
||
|
||
ax.text(2.5, 0.5, '(-1)·50 + 0·50 + 1·200 +\n(-1)·50 + 0·50 + 1·200 +\n(-1)·50 + 0·50 + 1·200\n= 450 (krawędź!)',
|
||
ha='center', fontsize=FS_TINY, fontweight='bold',
|
||
bbox=dict(boxstyle='round', facecolor=GRAY1, edgecolor=GREEN_ACCENT))
|
||
|
||
ax.axis('off')
|
||
|
||
# --- Panel 3: Vector visualization ---
|
||
ax = axes[2]
|
||
# Draw two vectors
|
||
ax.quiver(0, 0, 3, 4, angles='xy', scale_units='xy', scale=1,
|
||
color=ACCENT, width=0.025, label='a = [3, 4]')
|
||
ax.quiver(0, 0, 4, 1, angles='xy', scale_units='xy', scale=1,
|
||
color=RED_ACCENT, width=0.025, label='b = [4, 1]')
|
||
|
||
# Show angle
|
||
theta = np.linspace(np.arctan2(1, 4), np.arctan2(4, 3), 30)
|
||
r = 1.5
|
||
ax.plot(r * np.cos(theta), r * np.sin(theta), color=GREEN_ACCENT, linewidth=1.5)
|
||
ax.text(1.8, 1.3, 'θ', fontsize=FS, color=GREEN_ACCENT, fontweight='bold')
|
||
|
||
ax.text(3.2, 4.2, 'a', fontsize=FS, color=ACCENT, fontweight='bold')
|
||
ax.text(4.2, 1.2, 'b', fontsize=FS, color=RED_ACCENT, fontweight='bold')
|
||
|
||
ax.text(2.5, -1.0, 'a · b = |a|·|b|·cos(θ)\n= 3·4 + 4·1 = 16',
|
||
ha='center', fontsize=FS_SMALL, fontweight='bold',
|
||
bbox=dict(boxstyle='round', facecolor=GRAY1, edgecolor=GRAY3))
|
||
ax.text(2.5, -2.0, 'Mały kąt θ → duży dot product\n= wektory „zgadają się"',
|
||
ha='center', fontsize=FS_TINY, color=GRAY5)
|
||
|
||
ax.set_xlim(-0.5, 5.5)
|
||
ax.set_ylim(-2.5, 5.5)
|
||
ax.set_aspect('equal')
|
||
ax.grid(True, alpha=0.2)
|
||
ax.legend(fontsize=FS_SMALL, loc='upper left')
|
||
ax.set_title('Geometrycznie: kąt', fontsize=FS_TITLE, fontweight='bold')
|
||
|
||
plt.tight_layout()
|
||
plt.savefig(os.path.join(OUTPUT_DIR, 'q23_dot_product.png'), dpi=DPI, bbox_inches='tight',
|
||
facecolor='white')
|
||
plt.close()
|
||
print(" ✓ q23_dot_product.png")
|
||
|
||
|
||
# ============================================================
|
||
# 7. FCN — FC vs Conv 1x1, skip connections
|
||
# ============================================================
|
||
def generate_fcn():
|
||
fig, axes = plt.subplots(2, 1, figsize=(10, 7))
|
||
|
||
# --- Panel 1: FC vs Conv 1x1 ---
|
||
ax = axes[0]
|
||
ax.set_xlim(0, 20)
|
||
ax.set_ylim(0, 6)
|
||
ax.axis('off')
|
||
ax.set_title('FC (Fully Connected) vs Conv 1×1', fontsize=FS_TITLE, fontweight='bold')
|
||
|
||
# Classic CNN with FC
|
||
layer_info_fc = [
|
||
(1.5, 'Obraz\n224×224×3', 2.2, GRAY2),
|
||
(4.5, 'Conv+Pool\n112×112×64', 1.8, GRAY2),
|
||
(7.5, 'Conv+Pool\n7×7×512', 1.0, GRAY2),
|
||
(10, 'Flatten\n25088', 0.5, ACCENT_LIGHT),
|
||
(12, 'FC\n4096', 0.5, ACCENT_LIGHT),
|
||
(14, 'FC\n1000', 0.3, ACCENT_LIGHT),
|
||
(16, '"Kot"', 0.3, '#FFCDD2'),
|
||
]
|
||
|
||
y_fc = 4.5
|
||
for i, (x, label, w, color) in enumerate(layer_info_fc):
|
||
rect = FancyBboxPatch((x - w/2, y_fc - 0.6), w, 1.2,
|
||
boxstyle="round,pad=0.05", facecolor=color,
|
||
edgecolor=BLACK, linewidth=0.8)
|
||
ax.add_patch(rect)
|
||
ax.text(x, y_fc, label, ha='center', va='center', fontsize=FS_TINY)
|
||
if i < len(layer_info_fc) - 1:
|
||
next_x = layer_info_fc[i + 1][0]
|
||
ax.annotate('', xy=(next_x - layer_info_fc[i+1][2]/2, y_fc),
|
||
xytext=(x + w/2, y_fc),
|
||
arrowprops=dict(arrowstyle='->', color=GRAY5, lw=1))
|
||
|
||
ax.text(0.3, y_fc, 'CNN:', fontsize=FS, fontweight='bold', color=RED_ACCENT, va='center')
|
||
ax.text(12, y_fc + 1, 'PROBLEM: FC wymaga\nSTAŁEGO rozmiaru\n(np. 224×224)',
|
||
ha='center', fontsize=FS_SMALL, color=RED_ACCENT, fontweight='bold',
|
||
bbox=dict(boxstyle='round', facecolor='#FFCDD2', edgecolor=RED_ACCENT, alpha=0.3))
|
||
|
||
# FCN with Conv 1x1
|
||
layer_info_fcn = [
|
||
(1.5, 'Obraz\nH×W×3', 2.2, GRAY2),
|
||
(4.5, 'Conv+Pool\nH/2 × W/2\n×64', 1.8, GRAY2),
|
||
(7.5, 'Conv+Pool\nH/32 × W/32\n×512', 1.0, GRAY2),
|
||
(10.5, 'Conv 1×1\nH/32 × W/32\n×C', 0.8, '#C8E6C9'),
|
||
(13.5, 'Upsample\nH×W×C', 1.8, '#C8E6C9'),
|
||
(16.5, 'Mapa\nsegmentacji', 1.5, '#C8E6C9'),
|
||
]
|
||
|
||
y_fcn = 1.5
|
||
for i, (x, label, w, color) in enumerate(layer_info_fcn):
|
||
rect = FancyBboxPatch((x - w/2, y_fcn - 0.7), w, 1.4,
|
||
boxstyle="round,pad=0.05", facecolor=color,
|
||
edgecolor=BLACK, linewidth=0.8)
|
||
ax.add_patch(rect)
|
||
ax.text(x, y_fcn, label, ha='center', va='center', fontsize=FS_TINY)
|
||
if i < len(layer_info_fcn) - 1:
|
||
next_x = layer_info_fcn[i + 1][0]
|
||
ax.annotate('', xy=(next_x - layer_info_fcn[i+1][2]/2, y_fcn),
|
||
xytext=(x + w/2, y_fcn),
|
||
arrowprops=dict(arrowstyle='->', color=GRAY5, lw=1))
|
||
|
||
ax.text(0.3, y_fcn, 'FCN:', fontsize=FS, fontweight='bold', color=GREEN_ACCENT, va='center')
|
||
ax.text(10.5, y_fcn + 1.2, 'Conv 1×1:\nkażdy piksel\nosobno × wagi\n(jak FC ale\nzachowuje H×W)',
|
||
ha='center', fontsize=FS_TINY, color=GREEN_ACCENT,
|
||
bbox=dict(boxstyle='round', facecolor='#C8E6C9', edgecolor=GREEN_ACCENT, alpha=0.3))
|
||
|
||
# --- Panel 2: What FC and Conv do ---
|
||
ax = axes[1]
|
||
ax.set_xlim(0, 20)
|
||
ax.set_ylim(0, 6)
|
||
ax.axis('off')
|
||
ax.set_title('Co robi warstwa FC? Co robi konwolucja?', fontsize=FS_TITLE, fontweight='bold')
|
||
|
||
# FC explanation
|
||
rect = FancyBboxPatch((0.3, 3.2), 9, 2.5, boxstyle="round,pad=0.15",
|
||
facecolor=ACCENT_LIGHT, edgecolor=ACCENT, linewidth=1)
|
||
ax.add_patch(rect)
|
||
ax.text(4.8, 5.2, 'Fully Connected (FC)', fontsize=FS, fontweight='bold', ha='center')
|
||
ax.text(4.8, 4.5, 'KAŻDY neuron połączony z KAŻDYM wejściem\n'
|
||
'25 088 wejść × 4 096 neuronów = ~103 MLN wag!\n'
|
||
'Traci informację GDZIE (przestrzenną)\n'
|
||
'Wymaga STAŁEGO rozmiaru wejścia',
|
||
fontsize=FS_TINY, ha='center', va='top')
|
||
|
||
# Conv explanation
|
||
rect = FancyBboxPatch((10.3, 3.2), 9, 2.5, boxstyle="round,pad=0.15",
|
||
facecolor='#C8E6C9', edgecolor=GREEN_ACCENT, linewidth=1)
|
||
ax.add_patch(rect)
|
||
ax.text(14.8, 5.2, 'Konwolucja (Conv)', fontsize=FS, fontweight='bold', ha='center')
|
||
ax.text(14.8, 4.5, 'Filtr (np. 3×3) „jedzie" po obrazie\n'
|
||
'Te same wagi dla KAŻDEJ pozycji\n'
|
||
'Zachowuje informację GDZIE\n'
|
||
'Akceptuje DOWOLNY rozmiar wejścia',
|
||
fontsize=FS_TINY, ha='center', va='top')
|
||
|
||
# Conv 1x1 explanation
|
||
rect = FancyBboxPatch((3, 0.3), 14, 2.2, boxstyle="round,pad=0.15",
|
||
facecolor=GRAY1, edgecolor=BLACK, linewidth=1)
|
||
ax.add_patch(rect)
|
||
ax.text(10, 2.1, 'Conv 1×1 = „FC per piksel"', fontsize=FS, fontweight='bold', ha='center')
|
||
ax.text(10, 1.5, 'Filtr 1×1: patrzy na JEDEN piksel, ale WSZYSTKIE kanały (512→C klas)\n'
|
||
'Działa jak FC ale zachowuje mapę H×W → każdy piksel osobno klasyfikowany\n'
|
||
'FCN: zamień FC na Conv1×1 → koniec z wymogiem stałego rozmiaru!',
|
||
fontsize=FS_TINY, ha='center', va='top')
|
||
|
||
plt.tight_layout()
|
||
plt.savefig(os.path.join(OUTPUT_DIR, 'q23_fc_vs_conv1x1.png'), dpi=DPI, bbox_inches='tight',
|
||
facecolor='white')
|
||
plt.close()
|
||
print(" ✓ q23_fc_vs_conv1x1.png")
|
||
|
||
|
||
# ============================================================
|
||
# 8. U-NET ARCHITECTURE — Proper U-shaped diagram
|
||
# ============================================================
|
||
def generate_unet():
|
||
fig, ax = plt.subplots(1, 1, figsize=(10, 6))
|
||
ax.set_xlim(-1, 21)
|
||
ax.set_ylim(-1, 12)
|
||
ax.axis('off')
|
||
ax.set_title('U-Net: architektura w kształcie litery U', fontsize=FS_TITLE + 1, fontweight='bold')
|
||
|
||
# Encoder layers (going DOWN-LEFT)
|
||
encoder_layers = [
|
||
(2, 10, 2.5, 1.5, '572×572×1\n(wejście)', 64),
|
||
(2, 7.5, 2.2, 1.3, '284×284\n×64', 64),
|
||
(2, 5, 1.8, 1.1, '140×140\n×128', 128),
|
||
(2, 2.5, 1.5, 1.0, '68×68\n×256', 256),
|
||
]
|
||
|
||
# Bottleneck
|
||
bottleneck = (8, 0.5, 2.5, 1.2, '32×32×512\n(bottleneck)', 512)
|
||
|
||
# Decoder layers (going UP-RIGHT)
|
||
decoder_layers = [
|
||
(14, 2.5, 1.5, 1.0, '68×68\n×256', 256),
|
||
(14, 5, 1.8, 1.1, '140×140\n×128', 128),
|
||
(14, 7.5, 2.2, 1.3, '284×284\n×64', 64),
|
||
(14, 10, 2.5, 1.5, '572×572×C\n(mapa seg.)', 'C'),
|
||
]
|
||
|
||
def draw_block(ax, x, y, w, h, label, color):
|
||
rect = FancyBboxPatch((x - w/2, y - h/2), w, h,
|
||
boxstyle="round,pad=0.05", facecolor=color,
|
||
edgecolor=BLACK, linewidth=1.2)
|
||
ax.add_patch(rect)
|
||
ax.text(x, y, label, ha='center', va='center', fontsize=FS_TINY)
|
||
|
||
# Draw encoder
|
||
for x, y, w, h, label, channels in encoder_layers:
|
||
draw_block(ax, x, y, w, h, label, ACCENT_LIGHT)
|
||
|
||
# Draw arrows down (encoder)
|
||
for i in range(len(encoder_layers) - 1):
|
||
x1, y1 = encoder_layers[i][0], encoder_layers[i][1] - encoder_layers[i][3]/2
|
||
x2, y2 = encoder_layers[i+1][0], encoder_layers[i+1][1] + encoder_layers[i+1][3]/2
|
||
ax.annotate('', xy=(x2, y2), xytext=(x1, y1),
|
||
arrowprops=dict(arrowstyle='->', color=ACCENT, lw=2))
|
||
ax.text(x1 - 1.7, (y1 + y2) / 2, 'MaxPool\n2×2\n↓ zmniejsz', fontsize=FS_TINY,
|
||
ha='center', color=ACCENT, fontweight='bold')
|
||
|
||
# Encoder to bottleneck
|
||
x1, y1 = encoder_layers[-1][0], encoder_layers[-1][1] - encoder_layers[-1][3]/2
|
||
draw_block(ax, bottleneck[0], bottleneck[1], bottleneck[2], bottleneck[3],
|
||
bottleneck[4], GRAY2)
|
||
ax.annotate('', xy=(bottleneck[0] - bottleneck[2]/2, bottleneck[1] + bottleneck[3]/2),
|
||
xytext=(x1, y1),
|
||
arrowprops=dict(arrowstyle='->', color=ACCENT, lw=2))
|
||
|
||
# Bottleneck to decoder
|
||
ax.annotate('', xy=(decoder_layers[0][0] - decoder_layers[0][2]/2,
|
||
decoder_layers[0][1] - decoder_layers[0][3]/2),
|
||
xytext=(bottleneck[0] + bottleneck[2]/2, bottleneck[1] + bottleneck[3]/2),
|
||
arrowprops=dict(arrowstyle='->', color=RED_ACCENT, lw=2))
|
||
|
||
# Draw decoder
|
||
for x, y, w, h, label, channels in decoder_layers:
|
||
color = '#C8E6C9' if channels != 'C' else '#A5D6A7'
|
||
draw_block(ax, x, y, w, h, label, color)
|
||
|
||
# Draw arrows up (decoder)
|
||
for i in range(len(decoder_layers) - 1):
|
||
x1, y1 = decoder_layers[i][0], decoder_layers[i][1] + decoder_layers[i][3]/2
|
||
x2, y2 = decoder_layers[i+1][0], decoder_layers[i+1][1] - decoder_layers[i+1][3]/2
|
||
ax.annotate('', xy=(x2, y2), xytext=(x1, y1),
|
||
arrowprops=dict(arrowstyle='->', color=GREEN_ACCENT, lw=2))
|
||
ax.text(x1 + 2, (y1 + y2) / 2, 'UpConv\n2×2\n↑ zwiększ', fontsize=FS_TINY,
|
||
ha='center', color=GREEN_ACCENT, fontweight='bold')
|
||
|
||
# Skip connections (horizontal arrows)
|
||
skip_colors = [GRAY5, GRAY5, GRAY5, GRAY5]
|
||
for i in range(len(encoder_layers)):
|
||
enc = encoder_layers[i]
|
||
dec = decoder_layers[len(decoder_layers) - 1 - i]
|
||
ax.annotate('', xy=(dec[0] - dec[2]/2, dec[1]),
|
||
xytext=(enc[0] + enc[2]/2, enc[1]),
|
||
arrowprops=dict(arrowstyle='->', color=GRAY5, lw=1.5,
|
||
linestyle='dashed'))
|
||
mid_x = (enc[0] + enc[2]/2 + dec[0] - dec[2]/2) / 2
|
||
ax.text(mid_x, enc[1] + 0.6, 'skip\n(concat)', fontsize=FS_TINY,
|
||
ha='center', color=GRAY5, fontweight='bold')
|
||
|
||
# Labels
|
||
ax.text(0, 11.5, 'ENCODER\n(↓ zmniejsza)', fontsize=FS, fontweight='bold', color=ACCENT,
|
||
ha='center')
|
||
ax.text(17, 11.5, 'DECODER\n(↑ zwiększa)', fontsize=FS, fontweight='bold', color=GREEN_ACCENT,
|
||
ha='center')
|
||
ax.text(8, -0.8, 'Kształt litery „U": encoder schodzi ↓ → bottleneck na dnie → decoder wraca ↑',
|
||
fontsize=FS_SMALL, ha='center', color=GRAY5, fontweight='bold')
|
||
|
||
# Concatenation explanation
|
||
rect = FancyBboxPatch((17.5, 3), 3, 5, boxstyle="round,pad=0.15",
|
||
facecolor=GRAY1, edgecolor=GRAY5, linewidth=1, linestyle='--')
|
||
ax.add_patch(rect)
|
||
ax.text(19, 7.5, 'Concatenation:', fontsize=FS_SMALL, ha='center', fontweight='bold')
|
||
ax.text(19, 6.5, 'Encoder: 64 kanały\nDecoder: 64 kanały\n→ concat → 128 kanałów\n\n'
|
||
'Jak sklejenie\ndwóch stosów\nkart:', fontsize=FS_TINY, ha='center')
|
||
ax.text(19, 3.7, '[enc₁|enc₂|...|dec₁|dec₂|...]', fontsize=FS_TINY - 1, ha='center',
|
||
fontweight='bold', color=ACCENT)
|
||
|
||
plt.tight_layout()
|
||
plt.savefig(os.path.join(OUTPUT_DIR, 'q23_unet_arch.png'), dpi=DPI, bbox_inches='tight',
|
||
facecolor='white')
|
||
plt.close()
|
||
print(" ✓ q23_unet_arch.png")
|
||
|
||
|
||
# ============================================================
|
||
# 9. RECEPTIVE FIELD — with dilation
|
||
# ============================================================
|
||
def generate_receptive_field():
|
||
fig, axes = plt.subplots(1, 3, figsize=(11, 4))
|
||
|
||
def draw_grid(ax, size, highlight_cells, highlight_color, title, grid_offset=(0, 0)):
|
||
ox, oy = grid_offset
|
||
for i in range(size):
|
||
for j in range(size):
|
||
color = WHITE
|
||
if (i, j) in highlight_cells:
|
||
color = highlight_color
|
||
rect = patches.Rectangle((ox + j, oy + size - 1 - i), 1, 1,
|
||
facecolor=color, edgecolor=GRAY4, linewidth=0.5)
|
||
ax.add_patch(rect)
|
||
ax.set_title(title, fontsize=FS_TITLE, fontweight='bold')
|
||
|
||
# --- Panel 1: Standard 3x3 conv receptive field ---
|
||
ax = axes[0]
|
||
ax.set_xlim(-0.5, 7.5)
|
||
ax.set_ylim(-1, 8)
|
||
ax.set_aspect('equal')
|
||
ax.axis('off')
|
||
|
||
# 7x7 input grid
|
||
highlight_3x3 = [(2, 2), (2, 3), (2, 4), (3, 2), (3, 3), (3, 4), (4, 2), (4, 3), (4, 4)]
|
||
draw_grid(ax, 7, highlight_3x3, ACCENT_LIGHT, 'Zwykła conv 3×3')
|
||
ax.text(3.5, -0.5, 'RF = 3×3 pikseli', fontsize=FS, ha='center', fontweight='bold', color=ACCENT)
|
||
|
||
# --- Panel 2: Dilated conv (rate=2) ---
|
||
ax = axes[1]
|
||
ax.set_xlim(-0.5, 7.5)
|
||
ax.set_ylim(-1, 8)
|
||
ax.set_aspect('equal')
|
||
ax.axis('off')
|
||
|
||
# 7x7 input grid with dilated highlights
|
||
highlight_dilated = [(1, 1), (1, 3), (1, 5), (3, 1), (3, 3), (3, 5), (5, 1), (5, 3), (5, 5)]
|
||
draw_grid(ax, 7, highlight_dilated, '#FFCDD2', 'Dilated conv 3×3\n(rate=2)')
|
||
ax.text(3.5, -0.5, 'RF = 5×5, ale 9 parametrów!', fontsize=FS, ha='center',
|
||
fontweight='bold', color=RED_ACCENT)
|
||
|
||
# Connect dots to show pattern
|
||
dots_x = [1.5, 3.5, 5.5, 1.5, 3.5, 5.5, 1.5, 3.5, 5.5]
|
||
dots_y = [5.5, 5.5, 5.5, 3.5, 3.5, 3.5, 1.5, 1.5, 1.5]
|
||
ax.scatter(dots_x, dots_y, c=RED_ACCENT, s=30, zorder=5)
|
||
|
||
# --- Panel 3: Comparison ---
|
||
ax = axes[2]
|
||
ax.set_xlim(0, 10)
|
||
ax.set_ylim(0, 10)
|
||
ax.axis('off')
|
||
ax.set_title('Receptive Field\n(pole widzenia neuronu)', fontsize=FS_TITLE, fontweight='bold')
|
||
|
||
y = 8.5
|
||
lines = [
|
||
('RF = ile pikseli WEJŚCIOWYCH', FS, BLACK, 'bold'),
|
||
('wpływa na JEDEN piksel wyjścia', FS, BLACK, 'bold'),
|
||
('', 0, '', ''),
|
||
('Rate (współczynnik dylatacji):', FS, BLACK, 'bold'),
|
||
(' rate=1: filtr „dotyka" sąsiadów', FS_SMALL, BLACK, 'normal'),
|
||
(' rate=2: co drugi piksel → RF = 5×5', FS_SMALL, BLACK, 'normal'),
|
||
(' rate=3: co trzeci → RF = 7×7', FS_SMALL, BLACK, 'normal'),
|
||
(' WIĘCEJ kontekstu, TE SAME wagi!', FS_SMALL, GREEN_ACCENT, 'bold'),
|
||
('', 0, '', ''),
|
||
('Dlaczego ważne w segmentacji?', FS, BLACK, 'bold'),
|
||
(' Piksel sam nie wie czym jest.', FS_SMALL, BLACK, 'normal'),
|
||
(' Potrzebuje KONTEKSTU (otoczenia).', FS_SMALL, BLACK, 'normal'),
|
||
(' Większe RF → widzi obok budynki', FS_SMALL, BLACK, 'normal'),
|
||
(' → wie, że TEN piksel to „droga"', FS_SMALL, GREEN_ACCENT, 'bold'),
|
||
('', 0, '', ''),
|
||
('Global Average Pooling:', FS, BLACK, 'bold'),
|
||
(' Mapa H×W×C → 1×1×C', FS_SMALL, BLACK, 'normal'),
|
||
(' Średnia z CAŁEGO feature map', FS_SMALL, BLACK, 'normal'),
|
||
(' RF = nieskończone (cały obraz)', FS_SMALL, GREEN_ACCENT, 'bold'),
|
||
]
|
||
for txt, size, color, weight in lines:
|
||
if txt == '':
|
||
y -= 0.2
|
||
continue
|
||
ax.text(0.5, y, txt, fontsize=size, color=color, fontweight=weight, va='top')
|
||
y -= 0.45
|
||
|
||
plt.tight_layout()
|
||
plt.savefig(os.path.join(OUTPUT_DIR, 'q23_receptive_field.png'), dpi=DPI, bbox_inches='tight',
|
||
facecolor='white')
|
||
plt.close()
|
||
print(" ✓ q23_receptive_field.png")
|
||
|
||
|
||
# ============================================================
|
||
# 10. TRANSFORMER / Self-attention / SOTA
|
||
# ============================================================
|
||
def generate_transformer():
|
||
fig, axes = plt.subplots(1, 3, figsize=(11, 4))
|
||
|
||
# --- Panel 1: CNN local vs Transformer global ---
|
||
ax = axes[0]
|
||
ax.set_xlim(-0.5, 8.5)
|
||
ax.set_ylim(-1.5, 8.5)
|
||
ax.set_aspect('equal')
|
||
ax.axis('off')
|
||
ax.set_title('CNN: widzi LOKALNIE', fontsize=FS_TITLE, fontweight='bold')
|
||
|
||
# Draw 8x8 grid
|
||
for i in range(8):
|
||
for j in range(8):
|
||
color = WHITE
|
||
if 3 <= i <= 5 and 3 <= j <= 5:
|
||
color = ACCENT_LIGHT
|
||
rect = patches.Rectangle((j, 7 - i), 1, 1,
|
||
facecolor=color, edgecolor=GRAY3, linewidth=0.3)
|
||
ax.add_patch(rect)
|
||
|
||
# Highlight center
|
||
rect = patches.Rectangle((4, 4), 1, 1, facecolor=RED_ACCENT, edgecolor=BLACK, linewidth=1.5, alpha=0.7)
|
||
ax.add_patch(rect)
|
||
ax.text(4.5, 4.5, '?', ha='center', va='center', fontsize=FS, fontweight='bold', color=WHITE)
|
||
ax.text(4.5, -0.8, 'Filtr 3×3 widzi tylko\n9 sąsiednich pikseli', fontsize=FS_SMALL,
|
||
ha='center', color=ACCENT)
|
||
|
||
# --- Panel 2: Transformer global ---
|
||
ax = axes[1]
|
||
ax.set_xlim(-0.5, 8.5)
|
||
ax.set_ylim(-1.5, 8.5)
|
||
ax.set_aspect('equal')
|
||
ax.axis('off')
|
||
ax.set_title('Transformer: widzi GLOBALNIE', fontsize=FS_TITLE, fontweight='bold')
|
||
|
||
# Draw 8x8 grid all highlighted
|
||
for i in range(8):
|
||
for j in range(8):
|
||
color = '#FFCDD2'
|
||
rect = patches.Rectangle((j, 7 - i), 1, 1,
|
||
facecolor=color, edgecolor=GRAY3, linewidth=0.3)
|
||
ax.add_patch(rect)
|
||
|
||
rect = patches.Rectangle((4, 4), 1, 1, facecolor=RED_ACCENT, edgecolor=BLACK, linewidth=1.5, alpha=0.9)
|
||
ax.add_patch(rect)
|
||
ax.text(4.5, 4.5, '?', ha='center', va='center', fontsize=FS, fontweight='bold', color=WHITE)
|
||
ax.text(4.5, -0.8, 'Self-attention „pyta"\nALL 64 piksele naraz', fontsize=FS_SMALL,
|
||
ha='center', color=RED_ACCENT)
|
||
|
||
# --- Panel 3: SOTA + Transformer explanation ---
|
||
ax = axes[2]
|
||
ax.set_xlim(0, 10)
|
||
ax.set_ylim(0, 10)
|
||
ax.axis('off')
|
||
ax.set_title('Transformer & SOTA', fontsize=FS_TITLE, fontweight='bold')
|
||
|
||
y = 9.2
|
||
lines = [
|
||
('Transformer:', FS, BLACK, 'bold'),
|
||
(' Architektura z 2017 (Vaswani et al.)', FS_SMALL, BLACK, 'normal'),
|
||
(' Oryginalnie do NLP (tłumaczenie)', FS_SMALL, BLACK, 'normal'),
|
||
(' Kluczowy mechanizm: SELF-ATTENTION', FS_SMALL, ACCENT, 'bold'),
|
||
('', 0, '', ''),
|
||
('Self-attention w skrócie:', FS, BLACK, 'bold'),
|
||
(' Każdy piksel tworzy trzy wektory:', FS_SMALL, BLACK, 'normal'),
|
||
(' Q (Query — „czego szukam?")', FS_SMALL, ACCENT, 'normal'),
|
||
(' K (Key — „co oferuję innych")', FS_SMALL, RED_ACCENT, 'normal'),
|
||
(' V (Value — „moja wartość")', FS_SMALL, GREEN_ACCENT, 'normal'),
|
||
(' Attention = softmax(Q·Kᵀ/√d)·V', FS_SMALL, BLACK, 'bold'),
|
||
(' Koszt: O(n²) — n=liczba pikseli', FS_SMALL, RED_ACCENT, 'normal'),
|
||
('', 0, '', ''),
|
||
('SOTA = State Of The Art:', FS, BLACK, 'bold'),
|
||
(' Najlepszy znany wynik na benchmarku', FS_SMALL, BLACK, 'normal'),
|
||
(' Np. „mIoU 85.1% na ADE20K = SOTA"', FS_SMALL, BLACK, 'normal'),
|
||
(' Ciągle się zmienia (nowy paper', FS_SMALL, GRAY5, 'normal'),
|
||
(' → nowy SOTA)', FS_SMALL, GRAY5, 'normal'),
|
||
]
|
||
for txt, size, color, weight in lines:
|
||
if txt == '':
|
||
y -= 0.15
|
||
continue
|
||
ax.text(0.3, y, txt, fontsize=size, color=color, fontweight=weight, va='top')
|
||
y -= 0.45
|
||
|
||
plt.tight_layout()
|
||
plt.savefig(os.path.join(OUTPUT_DIR, 'q23_transformer_attention.png'), dpi=DPI, bbox_inches='tight',
|
||
facecolor='white')
|
||
plt.close()
|
||
print(" ✓ q23_transformer_attention.png")
|
||
|
||
|
||
# ============================================================
|
||
# 11. REGION GROWING — seed selection + BFS
|
||
# ============================================================
|
||
def generate_region_growing():
|
||
fig, axes = plt.subplots(1, 3, figsize=(11, 4.2))
|
||
|
||
# --- Panel 1: Manual vs automatic seed ---
|
||
ax = axes[0]
|
||
ax.set_xlim(0, 10)
|
||
ax.set_ylim(0, 10)
|
||
ax.axis('off')
|
||
ax.set_title('Seed: ręcznie vs automatycznie', fontsize=FS_TITLE, fontweight='bold')
|
||
|
||
y = 9.2
|
||
lines = [
|
||
('Ręczny seed:', FS, ACCENT, 'bold'),
|
||
(' Użytkownik klika na obraz', FS_SMALL, BLACK, 'normal'),
|
||
(' → „tu jest obiekt, od tego zacznij"', FS_SMALL, BLACK, 'normal'),
|
||
(' Użycie: segmentacja interaktywna', FS_SMALL, GRAY5, 'normal'),
|
||
(' (np. Photoshop — magic wand tool)', FS_SMALL, GRAY5, 'normal'),
|
||
('', 0, '', ''),
|
||
('Automatyczny seed:', FS, RED_ACCENT, 'bold'),
|
||
(' 1. Histogram → lokalne maxima', FS_SMALL, BLACK, 'normal'),
|
||
(' (najczęstsza jasność → seed)', FS_SMALL, GRAY5, 'normal'),
|
||
(' 2. Grid: siatka co N pikseli', FS_SMALL, BLACK, 'normal'),
|
||
(' (np. seed co 50 px → 100 seedów)', FS_SMALL, GRAY5, 'normal'),
|
||
(' 3. Losowe próbkowanie', FS_SMALL, BLACK, 'normal'),
|
||
(' 4. Ekstrema lokalne gradientu', FS_SMALL, BLACK, 'normal'),
|
||
('', 0, '', ''),
|
||
('Dlaczego OR?', FS, GREEN_ACCENT, 'bold'),
|
||
(' Ręczny → precyzyjny, ale wolny', FS_SMALL, BLACK, 'normal'),
|
||
(' Auto → szybki, ale over-segmentation', FS_SMALL, BLACK, 'normal'),
|
||
]
|
||
for txt, size, color, weight in lines:
|
||
if txt == '':
|
||
y -= 0.15
|
||
continue
|
||
ax.text(0.3, y, txt, fontsize=size, color=color, fontweight=weight, va='top')
|
||
y -= 0.45
|
||
|
||
# --- Panel 2: Region growing step by step ---
|
||
ax = axes[1]
|
||
ax.set_xlim(-0.5, 6.5)
|
||
ax.set_ylim(-1.5, 7.5)
|
||
ax.set_aspect('equal')
|
||
ax.axis('off')
|
||
ax.set_title('Region Growing: krok po kroku', fontsize=FS_TITLE, fontweight='bold')
|
||
|
||
# 6x6 grid with values
|
||
pixel_grid = np.array([
|
||
[150, 153, 148, 200, 210, 205],
|
||
[147, 155, 152, 195, 208, 200],
|
||
[145, 148, 160, 190, 195, 210],
|
||
[200, 195, 190, 155, 148, 150],
|
||
[210, 205, 200, 150, 152, 145],
|
||
[215, 208, 195, 148, 147, 155],
|
||
])
|
||
|
||
# Region grown from seed (2,1) with threshold 20
|
||
region_mask = np.array([
|
||
[1, 1, 1, 0, 0, 0],
|
||
[1, 1, 1, 0, 0, 0],
|
||
[1, 1, 1, 0, 0, 0],
|
||
[0, 0, 0, 1, 1, 1],
|
||
[0, 0, 0, 1, 1, 1],
|
||
[0, 0, 0, 1, 1, 1],
|
||
])
|
||
|
||
for i in range(6):
|
||
for j in range(6):
|
||
v = pixel_grid[i, j]
|
||
if region_mask[i, j] == 1 and v < 170:
|
||
color = ACCENT_LIGHT
|
||
elif region_mask[i, j] == 1:
|
||
color = GRAY2
|
||
else:
|
||
color = WHITE
|
||
if i == 1 and j == 1:
|
||
color = '#FFD54F' # Seed
|
||
rect = patches.Rectangle((j, 5 - i), 1, 1,
|
||
facecolor=color, edgecolor=GRAY4, linewidth=0.5)
|
||
ax.add_patch(rect)
|
||
ax.text(j + 0.5, 5 - i + 0.5, str(v), ha='center', va='center',
|
||
fontsize=FS_TINY, fontweight='bold')
|
||
|
||
# Mark seed
|
||
ax.annotate('SEED\n(155)', xy=(1.5, 4.5), fontsize=FS_SMALL,
|
||
ha='center', color=RED_ACCENT, fontweight='bold',
|
||
arrowprops=dict(arrowstyle='->', color=RED_ACCENT), xytext=(-0.5, 7))
|
||
|
||
ax.text(3, -0.8, 'Próg = 20\nNiebieski = region (|val - seed| < 20)',
|
||
fontsize=FS_TINY, ha='center', color=ACCENT)
|
||
|
||
# --- Panel 3: BFS expansion ---
|
||
ax = axes[2]
|
||
ax.set_xlim(-0.5, 6.5)
|
||
ax.set_ylim(-1.5, 7.5)
|
||
ax.set_aspect('equal')
|
||
ax.axis('off')
|
||
ax.set_title('Rosnący region (BFS)', fontsize=FS_TITLE, fontweight='bold')
|
||
|
||
# Show expansion waves
|
||
wave_colors = ['#FFD54F', '#FFF176', '#FFF9C4', ACCENT_LIGHT, '#B3D4FC']
|
||
wave_labels = ['Seed', 'Fala 1', 'Fala 2', 'Fala 3', 'Fala 4']
|
||
waves = [
|
||
[(1, 1)], # seed
|
||
[(0, 1), (1, 0), (1, 2), (2, 1)], # wave 1
|
||
[(0, 0), (0, 2), (2, 0), (2, 2)], # wave 2
|
||
]
|
||
|
||
for i in range(6):
|
||
for j in range(6):
|
||
color = WHITE
|
||
for w_idx, wave in enumerate(waves):
|
||
if (i, j) in wave:
|
||
color = wave_colors[w_idx]
|
||
rect = patches.Rectangle((j, 5 - i), 1, 1,
|
||
facecolor=color, edgecolor=GRAY4, linewidth=0.5)
|
||
ax.add_patch(rect)
|
||
|
||
# Draw BFS arrows from seed
|
||
seed_x, seed_y = 1.5, 4.5
|
||
for dx, dy, label in [(0, 1, ''), (0, -1, ''), (1, 0, ''), (-1, 0, '')]:
|
||
ax.annotate('', xy=(seed_x + dx * 0.7, seed_y + dy * 0.7),
|
||
xytext=(seed_x, seed_y),
|
||
arrowprops=dict(arrowstyle='->', color=RED_ACCENT, lw=1.2))
|
||
|
||
ax.text(3, -0.5, 'BFS: sprawdzaj sąsiadów,\ndodawaj podobne do kolejki',
|
||
fontsize=FS_TINY, ha='center', color=GRAY5)
|
||
|
||
# Legend
|
||
for w_idx, (color, label) in enumerate(zip(wave_colors[:3], wave_labels[:3])):
|
||
rect = patches.Rectangle((4, 6.5 - w_idx * 0.7), 0.5, 0.5,
|
||
facecolor=color, edgecolor=GRAY4, linewidth=0.5)
|
||
ax.add_patch(rect)
|
||
ax.text(4.8, 6.75 - w_idx * 0.7, label, fontsize=FS_TINY, va='center')
|
||
|
||
plt.tight_layout()
|
||
plt.savefig(os.path.join(OUTPUT_DIR, 'q23_region_growing.png'), dpi=DPI, bbox_inches='tight',
|
||
facecolor='white')
|
||
plt.close()
|
||
print(" ✓ q23_region_growing.png")
|
||
|
||
|
||
# ============================================================
|
||
# 12. DIY THRESHOLDING — Step-by-step example
|
||
# ============================================================
|
||
def generate_diy_thresholding():
|
||
fig, axes = plt.subplots(2, 3, figsize=(11, 7))
|
||
|
||
np.random.seed(42)
|
||
|
||
# Create a simple synthetic image: dark circle on bright background
|
||
size = 64
|
||
img = np.ones((size, size)) * 200 # bright background
|
||
yy, xx = np.mgrid[:size, :size]
|
||
mask = ((xx - 32)**2 + (yy - 32)**2) < 15**2
|
||
img[mask] = 60 # dark circle
|
||
# Add some noise
|
||
img += np.random.normal(0, 10, img.shape)
|
||
img = np.clip(img, 0, 255)
|
||
|
||
# --- Panel 1: Original image ---
|
||
ax = axes[0, 0]
|
||
ax.imshow(img, cmap='gray', vmin=0, vmax=255)
|
||
ax.set_title('Krok 1: obraz wejściowy', fontsize=FS, fontweight='bold')
|
||
ax.axis('off')
|
||
ax.text(32, -3, '64×64 pikseli, szare', fontsize=FS_TINY, ha='center')
|
||
|
||
# --- Panel 2: Histogram ---
|
||
ax = axes[0, 1]
|
||
counts, bins, _ = ax.hist(img.ravel(), bins=50, color=GRAY3, edgecolor=GRAY5, linewidth=0.5)
|
||
ax.axvline(x=128, color=RED_ACCENT, linewidth=2, linestyle='--', label='T=128 (Otsu)')
|
||
ax.set_xlabel('Jasność', fontsize=FS_SMALL)
|
||
ax.set_ylabel('Piksele', fontsize=FS_SMALL)
|
||
ax.set_title('Krok 2: histogram\n(bimodalny!)', fontsize=FS, fontweight='bold')
|
||
ax.legend(fontsize=FS_TINY)
|
||
ax.annotate('Garb 1\n(obiekt)', xy=(60, max(counts)*0.5), fontsize=FS_TINY, ha='center',
|
||
color=ACCENT, fontweight='bold')
|
||
ax.annotate('Garb 2\n(tło)', xy=(200, max(counts)*0.5), fontsize=FS_TINY, ha='center',
|
||
color=RED_ACCENT, fontweight='bold')
|
||
|
||
# --- Panel 3: Thresholding result ---
|
||
ax = axes[0, 2]
|
||
binary = (img > 128).astype(float)
|
||
ax.imshow(binary, cmap='gray', vmin=0, vmax=1)
|
||
ax.set_title('Krok 3: progowanie T=128', fontsize=FS, fontweight='bold')
|
||
ax.axis('off')
|
||
ax.text(32, -3, 'Biały = tło, Czarny = obiekt', fontsize=FS_TINY, ha='center')
|
||
|
||
# --- Panel 4: What Otsu does (variance plot) ---
|
||
ax = axes[1, 0]
|
||
# Compute within-class variance for each threshold
|
||
thresholds = range(10, 245)
|
||
variances = []
|
||
for t in thresholds:
|
||
c0 = img[img <= t].ravel()
|
||
c1 = img[img > t].ravel()
|
||
if len(c0) == 0 or len(c1) == 0:
|
||
variances.append(np.nan)
|
||
continue
|
||
w0 = len(c0) / len(img.ravel())
|
||
w1 = len(c1) / len(img.ravel())
|
||
var = w0 * np.var(c0) + w1 * np.var(c1)
|
||
variances.append(var)
|
||
|
||
ax.plot(list(thresholds), variances, color=ACCENT, linewidth=1.5)
|
||
best_t = list(thresholds)[np.nanargmin(variances)]
|
||
ax.axvline(x=best_t, color=RED_ACCENT, linewidth=1.5, linestyle='--', label=f'Otsu T={best_t}')
|
||
ax.scatter([best_t], [np.nanmin(variances)], c=RED_ACCENT, s=60, zorder=5)
|
||
ax.set_xlabel('Próg T', fontsize=FS_SMALL)
|
||
ax.set_ylabel('σ² wewnątrzklasowa', fontsize=FS_SMALL)
|
||
ax.set_title('Krok 4: Otsu szuka min σ²', fontsize=FS, fontweight='bold')
|
||
ax.legend(fontsize=FS_TINY)
|
||
|
||
# --- Panel 5: Pseudocode ---
|
||
ax = axes[1, 1]
|
||
ax.set_xlim(0, 10)
|
||
ax.set_ylim(0, 10)
|
||
ax.axis('off')
|
||
ax.set_title('Pseudokod Otsu', fontsize=FS, fontweight='bold')
|
||
|
||
code_lines = [
|
||
'best_T = 0',
|
||
'min_var = ∞',
|
||
'',
|
||
'for T in 0..255:',
|
||
' c0 = piksele z jasność ≤ T',
|
||
' c1 = piksele z jasność > T',
|
||
' w0 = len(c0) / len(all)',
|
||
' w1 = len(c1) / len(all)',
|
||
' var = w0·var(c0) + w1·var(c1)',
|
||
' if var < min_var:',
|
||
' min_var = var',
|
||
' best_T = T',
|
||
'',
|
||
'return best_T # optymalny próg',
|
||
]
|
||
for i, line in enumerate(code_lines):
|
||
color = ACCENT if 'best_T = T' in line or 'return' in line else BLACK
|
||
ax.text(0.5, 9.5 - i * 0.65, line, fontsize=FS_TINY, fontfamily='monospace',
|
||
color=color, fontweight='bold' if color == ACCENT else 'normal')
|
||
|
||
# --- Panel 6: Final result with Otsu ---
|
||
ax = axes[1, 2]
|
||
binary_otsu = (img > best_t).astype(float)
|
||
ax.imshow(binary_otsu, cmap='gray', vmin=0, vmax=1)
|
||
ax.set_title(f'Krok 5: wynik Otsu (T={best_t})', fontsize=FS, fontweight='bold')
|
||
ax.axis('off')
|
||
ax.text(32, -3, 'Automatyczny próg!', fontsize=FS_TINY, ha='center',
|
||
color=GREEN_ACCENT, fontweight='bold')
|
||
|
||
plt.tight_layout()
|
||
plt.savefig(os.path.join(OUTPUT_DIR, 'q23_diy_thresholding.png'), dpi=DPI, bbox_inches='tight',
|
||
facecolor='white')
|
||
plt.close()
|
||
print(" ✓ q23_diy_thresholding.png")
|
||
|
||
|
||
# ============================================================
|
||
# 13. DIY U-NET — Simplified step-by-step
|
||
# ============================================================
|
||
def generate_diy_unet():
|
||
fig, axes = plt.subplots(2, 3, figsize=(11, 7))
|
||
|
||
np.random.seed(42)
|
||
size = 64
|
||
|
||
# Create synthetic image with two regions
|
||
img = np.ones((size, size, 3), dtype=np.uint8) * 200 # bright bg
|
||
# Dark region (object 1)
|
||
yy, xx = np.mgrid[:size, :size]
|
||
mask1 = ((xx - 20)**2 + (yy - 30)**2) < 12**2
|
||
img[mask1] = [60, 60, 60]
|
||
# Medium region (object 2)
|
||
mask2 = ((xx - 45)**2 + (yy - 25)**2) < 8**2
|
||
img[mask2] = [120, 120, 120]
|
||
|
||
gt = np.zeros((size, size), dtype=np.uint8)
|
||
gt[mask1] = 1 # class 1
|
||
gt[mask2] = 2 # class 2
|
||
|
||
# --- Panel 1: Input image ---
|
||
ax = axes[0, 0]
|
||
ax.imshow(img)
|
||
ax.set_title('Krok 1: obraz RGB\n64×64×3', fontsize=FS, fontweight='bold')
|
||
ax.axis('off')
|
||
|
||
# --- Panel 2: Encoder shrinks ---
|
||
ax = axes[0, 1]
|
||
ax.set_xlim(0, 10)
|
||
ax.set_ylim(0, 10)
|
||
ax.axis('off')
|
||
ax.set_title('Krok 2: Encoder ZMNIEJSZA', fontsize=FS, fontweight='bold')
|
||
|
||
sizes = [(64, 3), (32, 64), (16, 128), (8, 256)]
|
||
y_pos = 8.5
|
||
for i, (s, c) in enumerate(sizes):
|
||
w = s / 64 * 4
|
||
h = 0.8
|
||
rect = FancyBboxPatch((5 - w/2, y_pos), w, h,
|
||
boxstyle="round,pad=0.05", facecolor=ACCENT_LIGHT,
|
||
edgecolor=ACCENT, linewidth=1)
|
||
ax.add_patch(rect)
|
||
ax.text(5, y_pos + h/2, f'{s}×{s}×{c}', ha='center', va='center',
|
||
fontsize=FS_SMALL, fontweight='bold')
|
||
if i < len(sizes) - 1:
|
||
ax.annotate('', xy=(5, y_pos - 0.3), xytext=(5, y_pos),
|
||
arrowprops=dict(arrowstyle='->', color=ACCENT, lw=1.5))
|
||
ax.text(7, y_pos - 0.15, 'Conv+Pool', fontsize=FS_TINY, color=ACCENT)
|
||
y_pos -= 2.2
|
||
|
||
ax.text(5, 0.3, 'Wyciąga cechy:\nkrawędzie → tekstury → obiekty',
|
||
ha='center', fontsize=FS_TINY, color=GRAY5)
|
||
|
||
# --- Panel 3: Bottleneck ---
|
||
ax = axes[0, 2]
|
||
# Show feature maps at bottleneck (abstract)
|
||
ax.set_xlim(0, 10)
|
||
ax.set_ylim(0, 10)
|
||
ax.axis('off')
|
||
ax.set_title('Krok 3: Bottleneck\n(najbardziej abstrakcyjne cechy)', fontsize=FS, fontweight='bold')
|
||
|
||
# Show small abstract feature maps
|
||
for k in range(4):
|
||
small = np.random.rand(4, 4)
|
||
ax_inset = fig.add_axes([0.68 + (k % 2) * 0.08, 0.72 - (k // 2) * 0.1, 0.06, 0.06])
|
||
ax_inset.imshow(small, cmap='gray')
|
||
ax_inset.axis('off')
|
||
|
||
ax.text(5, 5, '8×8×256\n\nMałe mapy, ale DUŻO kanałów\nKażdy kanał = jedna „cecha"\n'
|
||
'(np. kanał 42 = „wykrył koło"\n kanał 78 = „wykrył krawędź")\n\n'
|
||
'Wie CO jest na obrazie\nale nie wie GDZIE dokładnie',
|
||
ha='center', va='center', fontsize=FS_SMALL,
|
||
bbox=dict(boxstyle='round', facecolor=GRAY1, edgecolor=GRAY3))
|
||
|
||
# --- Panel 4: Decoder enlarges ---
|
||
ax = axes[1, 0]
|
||
ax.set_xlim(0, 10)
|
||
ax.set_ylim(0, 10)
|
||
ax.axis('off')
|
||
ax.set_title('Krok 4: Decoder ZWIĘKSZA\n(+ skip connections!)', fontsize=FS, fontweight='bold')
|
||
|
||
sizes_dec = [(8, 256), (16, 128), (32, 64), (64, 3)]
|
||
y_pos = 8.5
|
||
for i, (s, c) in enumerate(sizes_dec):
|
||
w = s / 64 * 4
|
||
h = 0.8
|
||
rect = FancyBboxPatch((5 - w/2, y_pos), w, h,
|
||
boxstyle="round,pad=0.05", facecolor='#C8E6C9',
|
||
edgecolor=GREEN_ACCENT, linewidth=1)
|
||
ax.add_patch(rect)
|
||
label = f'{s}×{s}×{c}'
|
||
if i < len(sizes_dec) - 1:
|
||
label += ' + skip!'
|
||
ax.text(5, y_pos + h/2, label, ha='center', va='center',
|
||
fontsize=FS_SMALL, fontweight='bold')
|
||
if i < len(sizes_dec) - 1:
|
||
ax.annotate('', xy=(5, y_pos - 0.3), xytext=(5, y_pos),
|
||
arrowprops=dict(arrowstyle='->', color=GREEN_ACCENT, lw=1.5))
|
||
ax.text(7, y_pos - 0.15, 'UpConv+Concat', fontsize=FS_TINY, color=GREEN_ACCENT)
|
||
y_pos -= 2.2
|
||
|
||
ax.text(5, 0.3, 'Odtwarza rozdzielczość:\nskip → przywraca krawędzie',
|
||
ha='center', fontsize=FS_TINY, color=GRAY5)
|
||
|
||
# --- Panel 5: Output segmentation map ---
|
||
ax = axes[1, 1]
|
||
cmap = plt.cm.colors.ListedColormap([WHITE, ACCENT_LIGHT, '#FFCDD2'])
|
||
ax.imshow(gt, cmap=cmap, interpolation='nearest')
|
||
ax.set_title('Krok 5: mapa segmentacji\n64×64 (3 klasy)', fontsize=FS, fontweight='bold')
|
||
ax.axis('off')
|
||
ax.text(20, -3, 'Tło=0, obiekt A=1, obiekt B=2', fontsize=FS_TINY, ha='center')
|
||
|
||
# --- Panel 6: Summary pseudocode ---
|
||
ax = axes[1, 2]
|
||
ax.set_xlim(0, 10)
|
||
ax.set_ylim(0, 10)
|
||
ax.axis('off')
|
||
ax.set_title('Pseudokod U-Net', fontsize=FS, fontweight='bold')
|
||
|
||
code_lines = [
|
||
'# ENCODER',
|
||
'e1 = conv_block(input, 64) # 64×64',
|
||
'e2 = conv_block(pool(e1), 128) # 32×32',
|
||
'e3 = conv_block(pool(e2), 256) # 16×16',
|
||
'',
|
||
'# BOTTLENECK',
|
||
'b = conv_block(pool(e3), 512) # 8×8',
|
||
'',
|
||
'# DECODER + SKIP',
|
||
'd3 = conv_block(concat(',
|
||
' upconv(b), e3), 256) # 16×16',
|
||
'd2 = conv_block(concat(',
|
||
' upconv(d3), e2), 128) # 32×32',
|
||
'd1 = conv_block(concat(',
|
||
' upconv(d2), e1), 64) # 64×64',
|
||
'',
|
||
'output = conv_1x1(d1, n_classes)',
|
||
]
|
||
for i, line in enumerate(code_lines):
|
||
color = ACCENT if 'concat' in line else (GREEN_ACCENT if 'output' in line else BLACK)
|
||
ax.text(0.3, 9.5 - i * 0.55, line, fontsize=FS_TINY, fontfamily='monospace',
|
||
color=color)
|
||
|
||
plt.tight_layout()
|
||
plt.savefig(os.path.join(OUTPUT_DIR, 'q23_diy_unet.png'), dpi=DPI, bbox_inches='tight',
|
||
facecolor='white')
|
||
plt.close()
|
||
print(" ✓ q23_diy_unet.png")
|
||
|
||
|
||
# ============================================================
|
||
# 14. MNEMONICS — Visual mnemonic summary
|
||
# ============================================================
|
||
def generate_mnemonics():
|
||
fig, ax = plt.subplots(1, 1, figsize=(10, 8))
|
||
ax.set_xlim(0, 20)
|
||
ax.set_ylim(0, 16)
|
||
ax.axis('off')
|
||
ax.set_title('Mnemoniki — segmentacja obrazu', fontsize=FS_TITLE + 2, fontweight='bold')
|
||
|
||
def draw_card(ax, x, y, w, h, title, mnemonic, color, detail=''):
|
||
rect = FancyBboxPatch((x, y), w, h, boxstyle="round,pad=0.15",
|
||
facecolor=color, edgecolor=BLACK, linewidth=1)
|
||
ax.add_patch(rect)
|
||
ax.text(x + w/2, y + h - 0.3, title, ha='center', va='top',
|
||
fontsize=FS, fontweight='bold')
|
||
ax.text(x + w/2, y + h/2 - 0.1, mnemonic, ha='center', va='center',
|
||
fontsize=FS_SMALL, fontstyle='italic', color=GRAY6)
|
||
if detail:
|
||
ax.text(x + w/2, y + 0.4, detail, ha='center', va='bottom',
|
||
fontsize=FS_TINY, color=GRAY5)
|
||
|
||
# Title: STRATEGIE KLASYCZNE
|
||
ax.text(5, 15.5, 'STRATEGIE KLASYCZNE', fontsize=FS_TITLE, fontweight='bold',
|
||
color=ACCENT, ha='center')
|
||
|
||
cards_classic = [
|
||
(0.2, 12.5, 4.5, 2.5, 'Thresholding', '„PRÓG na bramce"\nPrzepuszcza > T,\nblokuje ≤ T',
|
||
ACCENT_LIGHT, 'jasne=1, ciemne=0'),
|
||
(5, 12.5, 4.5, 2.5, 'Otsu', '„AUTO-bramkarz"\nSam dobiera próg\nmin σ² wewnątrz',
|
||
ACCENT_LIGHT, 'histogram bimodalny'),
|
||
(0.2, 9.5, 4.5, 2.5, 'Region Growing', '„PLAMA rozlana"\nSeed → BFS po\npodobnych sąsiadach',
|
||
ACCENT_LIGHT, 'jak atrament na papierze'),
|
||
(5, 9.5, 4.5, 2.5, 'Watershed', '„ZALEWANIE terenu"\nDoliny=obiekty\nGranie=granice',
|
||
ACCENT_LIGHT, 'woda + geography'),
|
||
(0.2, 6.5, 4.5, 2.5, 'Mean Shift', '„KULKI toczą się"\nKażda → max gęstości\nBez K!',
|
||
ACCENT_LIGHT, 'bandwidth = okno'),
|
||
(5, 6.5, 4.5, 2.5, 'Normalized Cuts', '„CIĘCIE sznurków"\nGraf: tnij słabe\nkrawędzie (O(n³)!)',
|
||
ACCENT_LIGHT, 'eigenvector problem'),
|
||
]
|
||
|
||
for args in cards_classic:
|
||
draw_card(ax, *args)
|
||
|
||
# Title: SIECI NEURONOWE
|
||
ax.text(15, 15.5, 'SIECI NEURONOWE', fontsize=FS_TITLE, fontweight='bold',
|
||
color=GREEN_ACCENT, ha='center')
|
||
|
||
cards_nn = [
|
||
(10.5, 12.5, 4.5, 2.5, 'FCN (2015)', '„FC → Conv 1×1"\nPierwsza end-to-end\nDowolny rozmiar',
|
||
'#C8E6C9', 'skip connections'),
|
||
(15.3, 12.5, 4.5, 2.5, 'U-Net (2015)', '„Litera U"\nEncoder↓ Decoder↑\nSkip = concat',
|
||
'#C8E6C9', 'medycyna, małe dane'),
|
||
(10.5, 9.5, 4.5, 2.5, 'DeepLab v3+', '„DZIURY w filtrze"\nAtrous conv (rate)\nASPP multi-scale',
|
||
'#C8E6C9', 'à trous = z dziurami'),
|
||
(15.3, 9.5, 4.5, 2.5, 'Transformer', '„WSZYSCY ze\nWSZYSTKIMI"\nSelf-attention O(n²)',
|
||
'#C8E6C9', 'SegFormer, Mask2Former'),
|
||
]
|
||
|
||
for args in cards_nn:
|
||
draw_card(ax, *args)
|
||
|
||
# Metryki
|
||
ax.text(10, 8.3, 'METRYKI I LOSS', fontsize=FS_TITLE, fontweight='bold',
|
||
color=RED_ACCENT, ha='center')
|
||
|
||
cards_metrics = [
|
||
(10.5, 6.5, 4.5, 1.6, 'mIoU', '„Nakładka / Suma"\nIoU = A∩B / A∪B',
|
||
'#FFCDD2', ''),
|
||
(15.3, 6.5, 4.5, 1.6, 'Dice / Focal', '„Dice=2·nakładka"\nFocal=trudne px',
|
||
'#FFCDD2', ''),
|
||
]
|
||
|
||
for args in cards_metrics:
|
||
draw_card(ax, *args)
|
||
|
||
# Master mnemonic at bottom
|
||
rect = FancyBboxPatch((1, 0.3), 18, 5.5, boxstyle="round,pad=0.2",
|
||
facecolor=GRAY1, edgecolor=BLACK, linewidth=1.5)
|
||
ax.add_patch(rect)
|
||
ax.text(10, 5.3, 'SUPER-MNEMONIK: kolejność algorytmów segmentacji',
|
||
ha='center', fontsize=FS, fontweight='bold')
|
||
ax.text(10, 4.5, '„TORW-MN FUD-T"', ha='center', fontsize=FS_TITLE + 2,
|
||
fontweight='bold', color=RED_ACCENT)
|
||
ax.text(10, 3.5, 'Klasyczne: Thresholding → Otsu → Region growing → Watershed → Mean shift → Norm. cuts',
|
||
ha='center', fontsize=FS_SMALL)
|
||
ax.text(10, 2.8, 'Neuronowe: FCN → U-Net → DeepLab → Transformer',
|
||
ha='center', fontsize=FS_SMALL)
|
||
ax.text(10, 1.8, '„Turyści Oglądają Rzekę, Wodospad, Morze, Nurt — Fotografują Uroczy Dwór Tajemnic"',
|
||
ha='center', fontsize=FS_SMALL, fontstyle='italic', color=ACCENT)
|
||
ax.text(10, 1.0, 'Klasyczne: proste→auto→BFS→flood→gęstość→graf | Neuronowe: FC→U-skip→dilated→attention',
|
||
ha='center', fontsize=FS_TINY, color=GRAY5)
|
||
|
||
plt.tight_layout()
|
||
plt.savefig(os.path.join(OUTPUT_DIR, 'q23_mnemonics.png'), dpi=DPI, bbox_inches='tight',
|
||
facecolor='white')
|
||
plt.close()
|
||
print(" ✓ q23_mnemonics.png")
|
||
|
||
|
||
# ============================================================
|
||
# MAIN
|
||
# ============================================================
|
||
if __name__ == '__main__':
|
||
print("Generating PYTANIE 23 diagrams...")
|
||
generate_otsu_bimodal()
|
||
generate_watershed()
|
||
generate_mean_shift()
|
||
generate_normalized_cuts()
|
||
generate_relu()
|
||
generate_dot_product()
|
||
generate_fcn()
|
||
generate_unet()
|
||
generate_receptive_field()
|
||
generate_transformer()
|
||
generate_region_growing()
|
||
generate_diy_thresholding()
|
||
generate_diy_unet()
|
||
generate_mnemonics()
|
||
print(f"\nAll diagrams saved to: {OUTPUT_DIR}")
|