praca_magisterska/pytania/generate_q23_diagrams.py

1638 lines
69 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
Generate all diagrams for PYTANIE 23: Segmentacja obrazu.
A4-compatible, monochrome-friendly (grays + one accent), 300 DPI.
"""
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import matplotlib.patches as patches
from matplotlib.patches import FancyArrowPatch, FancyBboxPatch
import numpy as np
import os
DPI = 300
OUTPUT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'img')
os.makedirs(OUTPUT_DIR, exist_ok=True)
# Color palette — monochrome-friendly
BLACK = '#000000'
WHITE = '#FFFFFF'
GRAY1 = '#F5F5F5'
GRAY2 = '#E0E0E0'
GRAY3 = '#BDBDBD'
GRAY4 = '#9E9E9E'
GRAY5 = '#757575'
GRAY6 = '#424242'
ACCENT = '#4A90D9' # single blue accent for highlights
ACCENT_LIGHT = '#B3D4FC'
RED_ACCENT = '#D32F2F'
GREEN_ACCENT = '#388E3C'
FS = 9
FS_TITLE = 11
FS_SMALL = 7
FS_TINY = 6
# ============================================================
# 1. OTSU — Bimodal histogram + within-class variance
# ============================================================
def generate_otsu_bimodal():
fig, axes = plt.subplots(1, 3, figsize=(11, 3.5))
# --- Panel 1: Bimodal histogram ---
ax = axes[0]
np.random.seed(42)
dark = np.random.normal(60, 20, 3000).clip(0, 255)
bright = np.random.normal(190, 25, 2000).clip(0, 255)
all_pixels = np.concatenate([dark, bright])
counts, bins, bars = ax.hist(all_pixels, bins=64, color=GRAY3, edgecolor=GRAY5, linewidth=0.5)
ax.axvline(x=128, color=RED_ACCENT, linewidth=2, linestyle='--', label='Próg Otsu T=128')
ax.fill_betweenx([0, max(counts)*1.1], 0, 128, alpha=0.12, color=ACCENT)
ax.fill_betweenx([0, max(counts)*1.1], 128, 255, alpha=0.12, color=RED_ACCENT)
ax.text(45, max(counts)*0.85, 'Klasa 0\n(tło)', ha='center', fontsize=FS, fontweight='bold', color=ACCENT)
ax.text(195, max(counts)*0.85, 'Klasa 1\n(obiekt)', ha='center', fontsize=FS, fontweight='bold', color=RED_ACCENT)
ax.annotate('Garb 1', xy=(60, max(counts)*0.6), fontsize=FS_SMALL, ha='center',
arrowprops=dict(arrowstyle='->', color=GRAY5), xytext=(30, max(counts)*0.45))
ax.annotate('Garb 2', xy=(190, max(counts)*0.5), fontsize=FS_SMALL, ha='center',
arrowprops=dict(arrowstyle='->', color=GRAY5), xytext=(220, max(counts)*0.35))
ax.set_xlabel('Jasność piksela (0255)', fontsize=FS)
ax.set_ylabel('Liczba pikseli', fontsize=FS)
ax.set_title('Histogram bimodalny', fontsize=FS_TITLE, fontweight='bold')
ax.legend(fontsize=FS_SMALL, loc='upper right')
ax.set_xlim(0, 255)
# --- Panel 2: Within-class variance explanation ---
ax = axes[1]
ax.set_xlim(0, 10)
ax.set_ylim(0, 10)
ax.axis('off')
ax.set_title('Wariancja wewnątrzklasowa', fontsize=FS_TITLE, fontweight='bold')
y = 9.2
texts = [
('Wariancja = jak bardzo wartości\nróżnią się od średniej', FS, 'black', 'normal'),
('', 0, 'black', 'normal'),
('Klasa 0 (piksele ≤ T):', FS, ACCENT, 'bold'),
(' wartości: 30, 50, 45, 60, 55', FS_SMALL, 'black', 'normal'),
(' średnia μ₀ = 48', FS_SMALL, 'black', 'normal'),
(' σ₀² = ((30-48)²+(50-48)²+...)/5 = 108', FS_SMALL, 'black', 'normal'),
('', 0, 'black', 'normal'),
('Klasa 1 (piksele > T):', FS, RED_ACCENT, 'bold'),
(' wartości: 180, 200, 190, 210, 195', FS_SMALL, 'black', 'normal'),
(' średnia μ₁ = 195', FS_SMALL, 'black', 'normal'),
(' σ₁² = ((180-195)²+...)/5 = 100', FS_SMALL, 'black', 'normal'),
('', 0, 'black', 'normal'),
('σ²_wewnątrz = w₀·σ₀² + w₁·σ₁²', FS, BLACK, 'bold'),
('= 0.6·108 + 0.4·100 = 104.8', FS_SMALL, 'black', 'normal'),
('', 0, 'black', 'normal'),
('Otsu próbuje KAŻDE T: 0,1,...,255', FS_SMALL, GREEN_ACCENT, 'bold'),
('Wybiera T dające MINIMUM σ²_wewnątrz', FS_SMALL, GREEN_ACCENT, 'bold'),
]
for txt, size, color, weight in texts:
if txt == '':
y -= 0.25
continue
ax.text(0.3, y, txt, fontsize=size, color=color, fontweight=weight,
va='top', transform=ax.transAxes if False else None)
y -= 0.55
# --- Panel 3: Jednorodność explanation ---
ax = axes[2]
ax.set_xlim(0, 10)
ax.set_ylim(0, 10)
ax.axis('off')
ax.set_title('"Jednorodne" = małe σ²', fontsize=FS_TITLE, fontweight='bold')
# Draw two clusters
np.random.seed(7)
# Good separation
c0 = np.random.normal(2, 0.4, 15)
c1 = np.random.normal(7, 0.4, 15)
y_pos_0 = np.random.uniform(6, 8, 15)
y_pos_1 = np.random.uniform(6, 8, 15)
ax.scatter(c0, y_pos_0, c=ACCENT, s=30, zorder=5, label='Klasa 0')
ax.scatter(c1, y_pos_1, c=RED_ACCENT, s=30, zorder=5, label='Klasa 1')
ax.axvline(x=4.5, color=GREEN_ACCENT, linewidth=2, linestyle='--')
ax.text(4.5, 8.8, 'T optymalny', ha='center', fontsize=FS_SMALL, color=GREEN_ACCENT, fontweight='bold')
ax.text(2, 5.3, 'σ₀² mała\n(skupione)', ha='center', fontsize=FS_SMALL, color=ACCENT)
ax.text(7, 5.3, 'σ₁² mała\n(skupione)', ha='center', fontsize=FS_SMALL, color=RED_ACCENT)
ax.text(5, 4, 'σ²_wewnątrz MINIMALNA\n→ klasy JEDNORODNE\n→ dobra segmentacja!',
ha='center', fontsize=FS, fontweight='bold', color=GREEN_ACCENT)
# Bad separation
c0b = np.random.normal(3.5, 1.5, 15)
c1b = np.random.normal(6, 1.5, 15)
y_pos_0b = np.random.uniform(1, 3, 15)
y_pos_1b = np.random.uniform(1, 3, 15)
ax.scatter(c0b, y_pos_0b, c=ACCENT, s=30, marker='x', zorder=5)
ax.scatter(c1b, y_pos_1b, c=RED_ACCENT, s=30, marker='x', zorder=5)
ax.axvline(x=4.5, color=GRAY4, linewidth=1, linestyle=':', ymin=0, ymax=0.35)
ax.text(5, 0.3, 'σ²_wewnątrz DUŻA → klasy mieszają się → zły próg',
ha='center', fontsize=FS_SMALL, color=GRAY5)
ax.legend(fontsize=FS_SMALL, loc='upper left')
plt.tight_layout()
plt.savefig(os.path.join(OUTPUT_DIR, 'q23_otsu_bimodal.png'), dpi=DPI, bbox_inches='tight',
facecolor='white')
plt.close()
print(" ✓ q23_otsu_bimodal.png")
# ============================================================
# 2. WATERSHED — Topographic flooding (not ASCII!)
# ============================================================
def generate_watershed():
fig, axes = plt.subplots(1, 3, figsize=(11, 3.8))
# --- Panel 1: Image as topographic surface ---
ax = axes[0]
x = np.linspace(0, 10, 200)
# Create a surface with two valleys and a ridge
surface = 3 * np.exp(-((x - 3)**2) / 1.5) + 4 * np.exp(-((x - 7)**2) / 1.2) + \
0.5 * np.sin(x * 2) + 1
# Invert: valleys at objects (dark), peaks at boundaries (bright)
surface_inv = 6 - surface + 1
ax.fill_between(x, 0, surface_inv, color=GRAY2, alpha=0.7)
ax.plot(x, surface_inv, color=BLACK, linewidth=1.5)
# Mark valleys
ax.annotate('Dolina 1\n(obiekt A)', xy=(3, surface_inv[60]), fontsize=FS_SMALL,
ha='center', va='bottom',
arrowprops=dict(arrowstyle='->', color=ACCENT), xytext=(1.5, 5.5))
ax.annotate('Dolina 2\n(obiekt B)', xy=(7, surface_inv[140]), fontsize=FS_SMALL,
ha='center', va='bottom',
arrowprops=dict(arrowstyle='->', color=RED_ACCENT), xytext=(8.5, 5.5))
# Mark ridge
ax.annotate('Grań\n(granica)', xy=(5, surface_inv[100]), fontsize=FS_SMALL,
ha='center', va='bottom',
arrowprops=dict(arrowstyle='->', color=GREEN_ACCENT), xytext=(5, 6.5))
ax.set_xlabel('Pozycja piksela', fontsize=FS)
ax.set_ylabel('Jasność (= wysokość)', fontsize=FS)
ax.set_title('Krok 1: obraz → teren', fontsize=FS_TITLE, fontweight='bold')
ax.set_ylim(0, 7)
# --- Panel 2: Flooding ---
ax = axes[1]
ax.fill_between(x, 0, surface_inv, color=GRAY2, alpha=0.7)
ax.plot(x, surface_inv, color=BLACK, linewidth=1.5)
# Water level
water_level = 3.2
water_mask_1 = (x < 5) & (surface_inv < water_level)
water_mask_2 = (x >= 5) & (surface_inv < water_level)
# Fill water in valley 1
x_v1 = x[(x > 1) & (x < 5)]
s_v1 = surface_inv[(x > 1) & (x < 5)]
ax.fill_between(x_v1, s_v1, water_level, where=s_v1 < water_level,
color=ACCENT_LIGHT, alpha=0.6)
# Fill water in valley 2
x_v2 = x[(x > 5) & (x < 9)]
s_v2 = surface_inv[(x > 5) & (x < 9)]
ax.fill_between(x_v2, s_v2, water_level, where=s_v2 < water_level,
color='#FFCDD2', alpha=0.6)
ax.axhline(y=water_level, color=ACCENT, linewidth=1, linestyle='--', alpha=0.5)
ax.text(3, 2.5, 'Woda A', fontsize=FS, ha='center', color=ACCENT, fontweight='bold')
ax.text(7, 2.2, 'Woda B', fontsize=FS, ha='center', color=RED_ACCENT, fontweight='bold')
ax.annotate('Tu się spotkają!\n→ GRANICA', xy=(5, surface_inv[100]), fontsize=FS_SMALL,
ha='center', color=GREEN_ACCENT, fontweight='bold',
arrowprops=dict(arrowstyle='->', color=GREEN_ACCENT), xytext=(5, 6.2))
ax.set_xlabel('Pozycja piksela', fontsize=FS)
ax.set_title('Krok 2: zalewanie', fontsize=FS_TITLE, fontweight='bold')
ax.set_ylim(0, 7)
# --- Panel 3: Result with problem ---
ax = axes[2]
ax.set_xlim(0, 10)
ax.set_ylim(0, 10)
ax.axis('off')
ax.set_title('Krok 3: wynik', fontsize=FS_TITLE, fontweight='bold')
# Good result
rect1 = FancyBboxPatch((0.5, 6), 3.5, 3.2, boxstyle="round,pad=0.1",
facecolor=ACCENT_LIGHT, edgecolor=BLACK, linewidth=1)
ax.add_patch(rect1)
ax.text(2.25, 8.8, 'Ideał: 2 segmenty', fontsize=FS, ha='center', fontweight='bold')
ax.text(2.25, 7.5, 'Segment A Segment B', fontsize=FS_SMALL, ha='center')
ax.text(2.25, 6.7, '(po marker-controlled)', fontsize=FS_SMALL, ha='center', color=GREEN_ACCENT)
# Bad result (over-segmentation)
rect2 = FancyBboxPatch((5.5, 6), 4, 3.2, boxstyle="round,pad=0.1",
facecolor='#FFCDD2', edgecolor=BLACK, linewidth=1)
ax.add_patch(rect2)
ax.text(7.5, 8.8, 'Problem: over-segmentation', fontsize=FS, ha='center', fontweight='bold',
color=RED_ACCENT)
ax.text(7.5, 7.8, '47 regionów zamiast 2!', fontsize=FS_SMALL, ha='center', color=RED_ACCENT)
ax.text(7.5, 7.1, 'Każde mini-minimum', fontsize=FS_SMALL, ha='center')
ax.text(7.5, 6.5, '→ osobna „dolina"', fontsize=FS_SMALL, ha='center')
# Solution: markers
rect3 = FancyBboxPatch((1, 0.5), 8, 4.5, boxstyle="round,pad=0.15",
facecolor=GRAY1, edgecolor=GREEN_ACCENT, linewidth=1.5)
ax.add_patch(rect3)
ax.text(5, 4.3, 'Rozwiązanie: Marker-controlled watershed', fontsize=FS,
ha='center', fontweight='bold', color=GREEN_ACCENT)
ax.text(5, 3.4, '1. Zaznacz ręcznie „seeds" (markery) w każdym obiekcie', fontsize=FS_SMALL, ha='center')
ax.text(5, 2.7, '2. Zalewaj TYLKO od tych markerów (nie od wszystkich minimów)', fontsize=FS_SMALL, ha='center')
ax.text(5, 2.0, '3. Eliminuje fałszywe doliny z szumu', fontsize=FS_SMALL, ha='center')
ax.text(5, 1.2, 'Wynik: tyle segmentów, ile podano markerów', fontsize=FS_SMALL, ha='center',
fontweight='bold')
plt.tight_layout()
plt.savefig(os.path.join(OUTPUT_DIR, 'q23_watershed.png'), dpi=DPI, bbox_inches='tight',
facecolor='white')
plt.close()
print(" ✓ q23_watershed.png")
# ============================================================
# 3. MEAN SHIFT — Kernel, density, feature space
# ============================================================
def generate_mean_shift():
fig, axes = plt.subplots(1, 3, figsize=(11, 4))
# --- Panel 1: Feature space concept ---
ax = axes[0]
np.random.seed(42)
# Three clusters in 2D feature space (brightness, x-position)
c1x = np.random.normal(2, 0.5, 40)
c1y = np.random.normal(2, 0.5, 40)
c2x = np.random.normal(6, 0.6, 35)
c2y = np.random.normal(7, 0.5, 35)
c3x = np.random.normal(8, 0.4, 25)
c3y = np.random.normal(3, 0.6, 25)
ax.scatter(c1x, c1y, c=GRAY4, s=15, alpha=0.7, zorder=3)
ax.scatter(c2x, c2y, c=GRAY4, s=15, alpha=0.7, zorder=3)
ax.scatter(c3x, c3y, c=GRAY4, s=15, alpha=0.7, zorder=3)
# Label peaks
ax.scatter([2], [2], c=RED_ACCENT, s=80, marker='*', zorder=5, label='Max gęstości')
ax.scatter([6], [7], c=RED_ACCENT, s=80, marker='*', zorder=5)
ax.scatter([8], [3], c=RED_ACCENT, s=80, marker='*', zorder=5)
ax.set_xlabel('Cecha 1: jasność', fontsize=FS)
ax.set_ylabel('Cecha 2: pozycja x', fontsize=FS)
ax.set_title('Przestrzeń cech', fontsize=FS_TITLE, fontweight='bold')
ax.text(2, 0.3, 'Klaster 1\n(ciemne, lewo)', ha='center', fontsize=FS_TINY, color=GRAY6)
ax.text(6, 5.3, 'Klaster 2\n(jasne, prawo)', ha='center', fontsize=FS_TINY, color=GRAY6)
ax.text(8, 1.3, 'Klaster 3\n(jasne, dół)', ha='center', fontsize=FS_TINY, color=GRAY6)
ax.legend(fontsize=FS_SMALL, loc='upper left')
# --- Panel 2: Kernel/window moving ---
ax = axes[1]
ax.scatter(c1x, c1y, c=ACCENT_LIGHT, s=15, alpha=0.7, zorder=3)
ax.scatter(c2x, c2y, c=GRAY3, s=15, alpha=0.7, zorder=3)
ax.scatter(c3x, c3y, c=GRAY3, s=15, alpha=0.7, zorder=3)
# Show kernel movement
path_x = [4.5, 3.8, 3.0, 2.3, 2.05]
path_y = [4.0, 3.3, 2.7, 2.2, 2.03]
for i, (px, py) in enumerate(zip(path_x, path_y)):
alpha = 0.3 + 0.15 * i
circle = plt.Circle((px, py), 1.2, fill=False, edgecolor=ACCENT,
linewidth=1.5, linestyle='--' if i < len(path_x)-1 else '-',
alpha=alpha)
ax.add_patch(circle)
if i < len(path_x) - 1:
ax.annotate('', xy=(path_x[i+1], path_y[i+1]),
xytext=(px, py),
arrowprops=dict(arrowstyle='->', color=RED_ACCENT, lw=1.5))
ax.scatter([path_x[0]], [path_y[0]], c=ACCENT, s=50, marker='o', zorder=5)
ax.scatter([path_x[-1]], [path_y[-1]], c=RED_ACCENT, s=80, marker='*', zorder=5)
ax.text(4.5, 5.2, 'Start: losowy\npiksel', fontsize=FS_SMALL, ha='center', color=ACCENT)
ax.text(2.05, 0.5, 'Koniec: max\ngęstości', fontsize=FS_SMALL, ha='center', color=RED_ACCENT,
fontweight='bold')
ax.text(7, 8, 'Okno (jądro)\nprzesuwa się\ndo skupiska', fontsize=FS_SMALL, ha='center',
color=GRAY6,
bbox=dict(boxstyle='round', facecolor=GRAY1, edgecolor=GRAY3))
ax.set_xlabel('Cecha 1', fontsize=FS)
ax.set_ylabel('Cecha 2', fontsize=FS)
ax.set_title('Jądro → max gęstości', fontsize=FS_TITLE, fontweight='bold')
ax.set_xlim(0, 10)
ax.set_ylim(0, 9)
# --- Panel 3: Why no K parameter ---
ax = axes[2]
ax.set_xlim(0, 10)
ax.set_ylim(0, 10)
ax.axis('off')
ax.set_title('Dlaczego bez K?', fontsize=FS_TITLE, fontweight='bold')
y = 9.0
lines = [
('K-means wymaga:', FS, RED_ACCENT, 'bold'),
(' „Podaj K=3 klastry"', FS_SMALL, 'black', 'normal'),
(' Problem: skąd wiesz ile klastrów?', FS_SMALL, GRAY5, 'normal'),
('', 0, '', ''),
('Mean Shift NIE wymaga K:', FS, GREEN_ACCENT, 'bold'),
(' Każdy piksel startuje → toczy się', FS_SMALL, 'black', 'normal'),
(' → trafia do najbliższego szczytu', FS_SMALL, 'black', 'normal'),
(' → ile szczytów = tyle segmentów', FS_SMALL, 'black', 'normal'),
(' → automatycznie!', FS_SMALL, GREEN_ACCENT, 'bold'),
('', 0, '', ''),
('Parametr: bandwidth (szerokość okna)', FS, 'black', 'bold'),
(' Duże okno → mało segmentów', FS_SMALL, 'black', 'normal'),
(' Małe okno → dużo segmentów', FS_SMALL, 'black', 'normal'),
('', 0, '', ''),
('Okno = jądro (kernel):', FS, 'black', 'bold'),
(' Koło o promieniu h wokół punktu.', FS_SMALL, 'black', 'normal'),
(' Oblicz średnią pikseli W oknie.', FS_SMALL, 'black', 'normal'),
(' Przesuń okno na tę średnią.', FS_SMALL, 'black', 'normal'),
(' Powtórz aż się zatrzyma.', FS_SMALL, 'black', 'normal'),
]
for txt, size, color, weight in lines:
if txt == '':
y -= 0.2
continue
ax.text(0.5, y, txt, fontsize=size, color=color, fontweight=weight, va='top')
y -= 0.5
plt.tight_layout()
plt.savefig(os.path.join(OUTPUT_DIR, 'q23_mean_shift.png'), dpi=DPI, bbox_inches='tight',
facecolor='white')
plt.close()
print(" ✓ q23_mean_shift.png")
# ============================================================
# 4. NORMALIZED CUTS — Graph cut visualization
# ============================================================
def generate_normalized_cuts():
fig, axes = plt.subplots(1, 3, figsize=(11, 4))
# --- Panel 1: Image as graph ---
ax = axes[0]
ax.set_xlim(-0.5, 4.5)
ax.set_ylim(-0.5, 4.5)
ax.set_aspect('equal')
ax.set_title('Obraz → graf', fontsize=FS_TITLE, fontweight='bold')
# Draw 4x4 pixel grid with colors
pixel_vals = np.array([
[30, 35, 180, 190],
[40, 30, 185, 200],
[170, 180, 40, 35],
[190, 175, 30, 45],
])
for i in range(4):
for j in range(4):
v = pixel_vals[i, j]
gray_val = v / 255.0
color = str(gray_val)
rect = patches.Rectangle((j - 0.4, 3 - i - 0.4), 0.8, 0.8,
facecolor=(gray_val, gray_val, gray_val),
edgecolor=BLACK, linewidth=0.8)
ax.add_patch(rect)
text_color = 'white' if v < 100 else 'black'
ax.text(j, 3 - i, str(v), ha='center', va='center', fontsize=FS_SMALL,
color=text_color, fontweight='bold')
# Draw edges between adjacent pixels
for i in range(4):
for j in range(4):
# Right neighbor
if j < 3:
similarity = max(0, 1 - abs(pixel_vals[i, j] - pixel_vals[i, j+1]) / 255)
lw = similarity * 2.5 + 0.3
alpha = similarity * 0.8 + 0.2
ax.plot([j + 0.4, j + 0.6], [3 - i, 3 - i], color=GRAY5,
linewidth=lw, alpha=alpha)
# Bottom neighbor
if i < 3:
similarity = max(0, 1 - abs(pixel_vals[i, j] - pixel_vals[i+1, j]) / 255)
lw = similarity * 2.5 + 0.3
alpha = similarity * 0.8 + 0.2
ax.plot([j, j], [3 - i - 0.4, 3 - i - 0.6], color=GRAY5,
linewidth=lw, alpha=alpha)
ax.text(2, -0.8, 'Grube linie = duże podobieństwo\n(silna krawędź grafu)',
ha='center', fontsize=FS_TINY, color=GRAY5)
ax.axis('off')
# --- Panel 2: Cut concept ---
ax = axes[1]
ax.set_xlim(0, 10)
ax.set_ylim(0, 10)
ax.axis('off')
ax.set_title('Cięcie grafu (graph cut)', fontsize=FS_TITLE, fontweight='bold')
# Draw two groups of nodes
# Group A (dark pixels)
positions_A = [(2, 7), (3, 8), (2, 5), (3, 6)]
positions_B = [(7, 7), (8, 8), (7, 5), (8, 6)]
# Intra-group edges (thick = similar)
for i, (x1, y1) in enumerate(positions_A):
for x2, y2 in positions_A[i+1:]:
ax.plot([x1, x2], [y1, y2], color=ACCENT, linewidth=2, alpha=0.5)
for i, (x1, y1) in enumerate(positions_B):
for x2, y2 in positions_B[i+1:]:
ax.plot([x1, x2], [y1, y2], color=RED_ACCENT, linewidth=2, alpha=0.5)
# Inter-group edges (thin = dissimilar) — these get cut
cut_edges = [((3, 8), (7, 7)), ((3, 6), (7, 5)), ((2, 5), (7, 5))]
for (x1, y1), (x2, y2) in cut_edges:
ax.plot([x1, x2], [y1, y2], color=GRAY4, linewidth=0.8, linestyle='--')
# Draw nodes
for x, y in positions_A:
ax.scatter(x, y, c=ACCENT, s=120, zorder=5, edgecolors=BLACK, linewidth=0.8)
for x, y in positions_B:
ax.scatter(x, y, c='#FFCDD2', s=120, zorder=5, edgecolors=BLACK, linewidth=0.8)
# Cut line
ax.plot([5, 5], [3.5, 9.5], color=RED_ACCENT, linewidth=2.5, linestyle='-',
zorder=4)
ax.text(5, 9.8, 'CIĘCIE', ha='center', fontsize=FS, fontweight='bold', color=RED_ACCENT)
ax.text(2.5, 3.8, 'Segment A\n(ciemne piksele)', ha='center', fontsize=FS_SMALL, color=ACCENT)
ax.text(7.5, 3.8, 'Segment B\n(jasne piksele)', ha='center', fontsize=FS_SMALL, color=RED_ACCENT)
# Formula
ax.text(5, 1.8, 'Ncut(A,B) = cut(A,B)/assoc(A,V)\n + cut(A,B)/assoc(B,V)',
ha='center', fontsize=FS_SMALL, fontweight='bold',
bbox=dict(boxstyle='round', facecolor=GRAY1, edgecolor=GRAY3))
ax.text(5, 0.5, 'Minimalizuj Ncut → tnij SŁABE krawędzie\nzachowuj SILNE (wewnątrz grupy)',
ha='center', fontsize=FS_TINY, color=GRAY5)
# --- Panel 3: Algorithm summary ---
ax = axes[2]
ax.set_xlim(0, 10)
ax.set_ylim(0, 10)
ax.axis('off')
ax.set_title('Algorytm Normalized Cuts', fontsize=FS_TITLE, fontweight='bold')
steps = [
('1. Zbuduj graf', 'Piksele = węzły\nKrawędzie = podobieństwo sąsiadów\n(kolor, jasność, odległość)'),
('2. Macierz podobieństwa W', 'W[i,j] = exp(-|kolori - kolorj|² / σ²)\n→ im podobniejsze, tym wyższa waga'),
('3. Macierz stopni D', 'D[i,i] = Σ W[i,j]\n(suma wszystkich wag z węzła i)'),
('4. Rozwiąż problem własny', '(D-W)·y = λ·D·y\n→ drugi najm. wektor własny y'),
('5. Podziel wg y', 'y[i] > 0 → segment A\ny[i] ≤ 0 → segment B'),
]
y = 9.5
for title, desc in steps:
ax.text(0.5, y, title, fontsize=FS, fontweight='bold', va='top')
y -= 0.4
ax.text(0.8, y, desc, fontsize=FS_TINY, va='top', color=GRAY6)
y -= 1.2
ax.text(5, 0.3, 'Złożoność: O(n³) — wymaga eigen decomposition!',
ha='center', fontsize=FS_SMALL, fontweight='bold', color=RED_ACCENT)
plt.tight_layout()
plt.savefig(os.path.join(OUTPUT_DIR, 'q23_normalized_cuts.png'), dpi=DPI, bbox_inches='tight',
facecolor='white')
plt.close()
print(" ✓ q23_normalized_cuts.png")
# ============================================================
# 5. RELU — Function plot
# ============================================================
def generate_relu():
fig, axes = plt.subplots(1, 2, figsize=(8, 3.5))
# --- Panel 1: ReLU plot ---
ax = axes[0]
x = np.linspace(-5, 5, 200)
relu = np.maximum(0, x)
ax.plot(x, relu, color=ACCENT, linewidth=2.5, label='ReLU(x) = max(0, x)')
ax.axhline(y=0, color=GRAY3, linewidth=0.5)
ax.axvline(x=0, color=GRAY3, linewidth=0.5)
ax.fill_between(x[x < 0], 0, 0, color=RED_ACCENT, alpha=0.1)
ax.fill_between(x[x >= 0], 0, relu[x >= 0], color=ACCENT, alpha=0.1)
# Annotations
ax.annotate('x < 0 → output = 0\n(neuron „wyłączony")', xy=(-3, 0),
fontsize=FS_SMALL, ha='center', va='bottom', color=RED_ACCENT,
arrowprops=dict(arrowstyle='->', color=RED_ACCENT), xytext=(-3, 2))
ax.annotate('x ≥ 0 → output = x\n(neuron „włączony")', xy=(3, 3),
fontsize=FS_SMALL, ha='center', va='bottom', color=ACCENT,
arrowprops=dict(arrowstyle='->', color=ACCENT), xytext=(3, 4.5))
ax.scatter([0], [0], c=BLACK, s=40, zorder=5)
ax.text(0.3, -0.5, '(0,0)', fontsize=FS_SMALL, color=GRAY5)
ax.set_xlabel('x (wejście neuronu)', fontsize=FS)
ax.set_ylabel('ReLU(x)', fontsize=FS)
ax.set_title('ReLU — Rectified Linear Unit', fontsize=FS_TITLE, fontweight='bold')
ax.legend(fontsize=FS_SMALL, loc='upper left')
ax.set_ylim(-1, 6)
ax.grid(True, alpha=0.2)
# --- Panel 2: Why ReLU ---
ax = axes[1]
ax.set_xlim(0, 10)
ax.set_ylim(0, 10)
ax.axis('off')
ax.set_title('Dlaczego ReLU?', fontsize=FS_TITLE, fontweight='bold')
y = 9.0
lines = [
('Neuron oblicza:', FS, BLACK, 'bold'),
(' z = w₁·x₁ + w₂·x₂ + ... + bias', FS_SMALL, BLACK, 'normal'),
(' output = ReLU(z) = max(0, z)', FS_SMALL, ACCENT, 'bold'),
('', 0, '', ''),
('Przykład:', FS, BLACK, 'bold'),
(' wagi: w₁=0.5, w₂=-0.3, bias=0.1', FS_SMALL, BLACK, 'normal'),
(' wejścia: x₁=2.0, x₂=4.0', FS_SMALL, BLACK, 'normal'),
(' z = 0.5·2 + (-0.3)·4 + 0.1 = -0.1', FS_SMALL, BLACK, 'normal'),
(' ReLU(-0.1) = max(0, -0.1) = 0', FS_SMALL, RED_ACCENT, 'bold'),
(' → neuron milczy (wejście nieistotne)', FS_SMALL, GRAY5, 'normal'),
('', 0, '', ''),
('Gdyby z = 2.3:', FS, BLACK, 'bold'),
(' ReLU(2.3) = max(0, 2.3) = 2.3', FS_SMALL, GREEN_ACCENT, 'bold'),
(' → neuron aktywny! Przekazuje sygnał', FS_SMALL, GRAY5, 'normal'),
('', 0, '', ''),
('Szybsza niż sigmoid/tanh', FS_SMALL, GRAY5, 'normal'),
('(brak exp() → szybkie obliczenia)', FS_SMALL, GRAY5, 'normal'),
]
for txt, size, color, weight in lines:
if txt == '':
y -= 0.2
continue
ax.text(0.5, y, txt, fontsize=size, color=color, fontweight=weight, va='top')
y -= 0.5
plt.tight_layout()
plt.savefig(os.path.join(OUTPUT_DIR, 'q23_relu.png'), dpi=DPI, bbox_inches='tight',
facecolor='white')
plt.close()
print(" ✓ q23_relu.png")
# ============================================================
# 6. DOT PRODUCT — Iloczyn skalarny visual
# ============================================================
def generate_dot_product():
fig, axes = plt.subplots(1, 3, figsize=(11, 3.5))
# --- Panel 1: Concept ---
ax = axes[0]
ax.set_xlim(0, 10)
ax.set_ylim(0, 10)
ax.axis('off')
ax.set_title('Iloczyn skalarny\n(dot product)', fontsize=FS_TITLE, fontweight='bold')
y = 8.5
lines = [
('Dwa wektory (listy liczb) → JEDNA liczba', FS, BLACK, 'bold'),
('', 0, '', ''),
('a = [a₁, a₂, a₃] b = [b₁, b₂, b₃]', FS, ACCENT, 'normal'),
('', 0, '', ''),
('a · b = a₁·b₁ + a₂·b₂ + a₃·b₃', FS, BLACK, 'bold'),
('', 0, '', ''),
('Przykład:', FS, BLACK, 'bold'),
('a = [1, 3, -2] b = [4, -1, 5]', FS_SMALL, BLACK, 'normal'),
('a·b = 1·4 + 3·(-1) + (-2)·5', FS_SMALL, BLACK, 'normal'),
(' = 4 + (-3) + (-10) = -9', FS_SMALL, RED_ACCENT, 'bold'),
('', 0, '', ''),
('Duży wynik → wektory „podobne" (w tym samym kierunku)', FS_SMALL, GREEN_ACCENT, 'normal'),
('Mały/ujemny → wektory „różne"', FS_SMALL, RED_ACCENT, 'normal'),
]
for txt, size, color, weight in lines:
if txt == '':
y -= 0.25
continue
ax.text(0.5, y, txt, fontsize=size, color=color, fontweight=weight, va='top')
y -= 0.55
# --- Panel 2: Convolution as dot product ---
ax = axes[1]
ax.set_xlim(-0.5, 5.5)
ax.set_ylim(-0.5, 5.5)
ax.set_aspect('equal')
ax.set_title('Konwolucja = iloczyn skalarny\nfiltra × fragment obrazu', fontsize=FS_TITLE, fontweight='bold')
# Filter 3x3
filter_vals = [[-1, 0, 1], [-1, 0, 1], [-1, 0, 1]]
for i in range(3):
for j in range(3):
rect = patches.Rectangle((j - 0.4, 4 - i - 0.4), 0.8, 0.8,
facecolor=ACCENT_LIGHT, edgecolor=BLACK, linewidth=0.8)
ax.add_patch(rect)
ax.text(j, 4 - i, str(filter_vals[i][j]), ha='center', va='center',
fontsize=FS, fontweight='bold')
ax.text(1, 1.5, 'Filtr', ha='center', fontsize=FS, fontweight='bold', color=ACCENT)
# Image patch
img_vals = [[50, 50, 200], [50, 50, 200], [50, 50, 200]]
for i in range(3):
for j in range(3):
rect = patches.Rectangle((j + 2.6, 4 - i - 0.4), 0.8, 0.8,
facecolor=GRAY2, edgecolor=BLACK, linewidth=0.8)
ax.add_patch(rect)
ax.text(j + 3, 4 - i, str(img_vals[i][j]), ha='center', va='center',
fontsize=FS, fontweight='bold')
ax.text(4, 1.5, 'Fragment\nobrazu', ha='center', fontsize=FS, fontweight='bold', color=GRAY5)
ax.text(2.5, 0.5, '(-1)·50 + 0·50 + 1·200 +\n(-1)·50 + 0·50 + 1·200 +\n(-1)·50 + 0·50 + 1·200\n= 450 (krawędź!)',
ha='center', fontsize=FS_TINY, fontweight='bold',
bbox=dict(boxstyle='round', facecolor=GRAY1, edgecolor=GREEN_ACCENT))
ax.axis('off')
# --- Panel 3: Vector visualization ---
ax = axes[2]
# Draw two vectors
ax.quiver(0, 0, 3, 4, angles='xy', scale_units='xy', scale=1,
color=ACCENT, width=0.025, label='a = [3, 4]')
ax.quiver(0, 0, 4, 1, angles='xy', scale_units='xy', scale=1,
color=RED_ACCENT, width=0.025, label='b = [4, 1]')
# Show angle
theta = np.linspace(np.arctan2(1, 4), np.arctan2(4, 3), 30)
r = 1.5
ax.plot(r * np.cos(theta), r * np.sin(theta), color=GREEN_ACCENT, linewidth=1.5)
ax.text(1.8, 1.3, 'θ', fontsize=FS, color=GREEN_ACCENT, fontweight='bold')
ax.text(3.2, 4.2, 'a', fontsize=FS, color=ACCENT, fontweight='bold')
ax.text(4.2, 1.2, 'b', fontsize=FS, color=RED_ACCENT, fontweight='bold')
ax.text(2.5, -1.0, 'a · b = |a|·|b|·cos(θ)\n= 3·4 + 4·1 = 16',
ha='center', fontsize=FS_SMALL, fontweight='bold',
bbox=dict(boxstyle='round', facecolor=GRAY1, edgecolor=GRAY3))
ax.text(2.5, -2.0, 'Mały kąt θ → duży dot product\n= wektory „zgadają się"',
ha='center', fontsize=FS_TINY, color=GRAY5)
ax.set_xlim(-0.5, 5.5)
ax.set_ylim(-2.5, 5.5)
ax.set_aspect('equal')
ax.grid(True, alpha=0.2)
ax.legend(fontsize=FS_SMALL, loc='upper left')
ax.set_title('Geometrycznie: kąt', fontsize=FS_TITLE, fontweight='bold')
plt.tight_layout()
plt.savefig(os.path.join(OUTPUT_DIR, 'q23_dot_product.png'), dpi=DPI, bbox_inches='tight',
facecolor='white')
plt.close()
print(" ✓ q23_dot_product.png")
# ============================================================
# 7. FCN — FC vs Conv 1x1, skip connections
# ============================================================
def generate_fcn():
fig, axes = plt.subplots(2, 1, figsize=(10, 7))
# --- Panel 1: FC vs Conv 1x1 ---
ax = axes[0]
ax.set_xlim(0, 20)
ax.set_ylim(0, 6)
ax.axis('off')
ax.set_title('FC (Fully Connected) vs Conv 1×1', fontsize=FS_TITLE, fontweight='bold')
# Classic CNN with FC
layer_info_fc = [
(1.5, 'Obraz\n224×224×3', 2.2, GRAY2),
(4.5, 'Conv+Pool\n112×112×64', 1.8, GRAY2),
(7.5, 'Conv+Pool\n7×7×512', 1.0, GRAY2),
(10, 'Flatten\n25088', 0.5, ACCENT_LIGHT),
(12, 'FC\n4096', 0.5, ACCENT_LIGHT),
(14, 'FC\n1000', 0.3, ACCENT_LIGHT),
(16, '"Kot"', 0.3, '#FFCDD2'),
]
y_fc = 4.5
for i, (x, label, w, color) in enumerate(layer_info_fc):
rect = FancyBboxPatch((x - w/2, y_fc - 0.6), w, 1.2,
boxstyle="round,pad=0.05", facecolor=color,
edgecolor=BLACK, linewidth=0.8)
ax.add_patch(rect)
ax.text(x, y_fc, label, ha='center', va='center', fontsize=FS_TINY)
if i < len(layer_info_fc) - 1:
next_x = layer_info_fc[i + 1][0]
ax.annotate('', xy=(next_x - layer_info_fc[i+1][2]/2, y_fc),
xytext=(x + w/2, y_fc),
arrowprops=dict(arrowstyle='->', color=GRAY5, lw=1))
ax.text(0.3, y_fc, 'CNN:', fontsize=FS, fontweight='bold', color=RED_ACCENT, va='center')
ax.text(12, y_fc + 1, 'PROBLEM: FC wymaga\nSTAŁEGO rozmiaru\n(np. 224×224)',
ha='center', fontsize=FS_SMALL, color=RED_ACCENT, fontweight='bold',
bbox=dict(boxstyle='round', facecolor='#FFCDD2', edgecolor=RED_ACCENT, alpha=0.3))
# FCN with Conv 1x1
layer_info_fcn = [
(1.5, 'Obraz\nH×W×3', 2.2, GRAY2),
(4.5, 'Conv+Pool\nH/2 × W/2\n×64', 1.8, GRAY2),
(7.5, 'Conv+Pool\nH/32 × W/32\n×512', 1.0, GRAY2),
(10.5, 'Conv 1×1\nH/32 × W/32\n×C', 0.8, '#C8E6C9'),
(13.5, 'Upsample\nH×W×C', 1.8, '#C8E6C9'),
(16.5, 'Mapa\nsegmentacji', 1.5, '#C8E6C9'),
]
y_fcn = 1.5
for i, (x, label, w, color) in enumerate(layer_info_fcn):
rect = FancyBboxPatch((x - w/2, y_fcn - 0.7), w, 1.4,
boxstyle="round,pad=0.05", facecolor=color,
edgecolor=BLACK, linewidth=0.8)
ax.add_patch(rect)
ax.text(x, y_fcn, label, ha='center', va='center', fontsize=FS_TINY)
if i < len(layer_info_fcn) - 1:
next_x = layer_info_fcn[i + 1][0]
ax.annotate('', xy=(next_x - layer_info_fcn[i+1][2]/2, y_fcn),
xytext=(x + w/2, y_fcn),
arrowprops=dict(arrowstyle='->', color=GRAY5, lw=1))
ax.text(0.3, y_fcn, 'FCN:', fontsize=FS, fontweight='bold', color=GREEN_ACCENT, va='center')
ax.text(10.5, y_fcn + 1.2, 'Conv 1×1:\nkażdy piksel\nosobno × wagi\n(jak FC ale\nzachowuje H×W)',
ha='center', fontsize=FS_TINY, color=GREEN_ACCENT,
bbox=dict(boxstyle='round', facecolor='#C8E6C9', edgecolor=GREEN_ACCENT, alpha=0.3))
# --- Panel 2: What FC and Conv do ---
ax = axes[1]
ax.set_xlim(0, 20)
ax.set_ylim(0, 6)
ax.axis('off')
ax.set_title('Co robi warstwa FC? Co robi konwolucja?', fontsize=FS_TITLE, fontweight='bold')
# FC explanation
rect = FancyBboxPatch((0.3, 3.2), 9, 2.5, boxstyle="round,pad=0.15",
facecolor=ACCENT_LIGHT, edgecolor=ACCENT, linewidth=1)
ax.add_patch(rect)
ax.text(4.8, 5.2, 'Fully Connected (FC)', fontsize=FS, fontweight='bold', ha='center')
ax.text(4.8, 4.5, 'KAŻDY neuron połączony z KAŻDYM wejściem\n'
'25 088 wejść × 4 096 neuronów = ~103 MLN wag!\n'
'Traci informację GDZIE (przestrzenną)\n'
'Wymaga STAŁEGO rozmiaru wejścia',
fontsize=FS_TINY, ha='center', va='top')
# Conv explanation
rect = FancyBboxPatch((10.3, 3.2), 9, 2.5, boxstyle="round,pad=0.15",
facecolor='#C8E6C9', edgecolor=GREEN_ACCENT, linewidth=1)
ax.add_patch(rect)
ax.text(14.8, 5.2, 'Konwolucja (Conv)', fontsize=FS, fontweight='bold', ha='center')
ax.text(14.8, 4.5, 'Filtr (np. 3×3) „jedzie" po obrazie\n'
'Te same wagi dla KAŻDEJ pozycji\n'
'Zachowuje informację GDZIE\n'
'Akceptuje DOWOLNY rozmiar wejścia',
fontsize=FS_TINY, ha='center', va='top')
# Conv 1x1 explanation
rect = FancyBboxPatch((3, 0.3), 14, 2.2, boxstyle="round,pad=0.15",
facecolor=GRAY1, edgecolor=BLACK, linewidth=1)
ax.add_patch(rect)
ax.text(10, 2.1, 'Conv 1×1 = „FC per piksel"', fontsize=FS, fontweight='bold', ha='center')
ax.text(10, 1.5, 'Filtr 1×1: patrzy na JEDEN piksel, ale WSZYSTKIE kanały (512→C klas)\n'
'Działa jak FC ale zachowuje mapę H×W → każdy piksel osobno klasyfikowany\n'
'FCN: zamień FC na Conv1×1 → koniec z wymogiem stałego rozmiaru!',
fontsize=FS_TINY, ha='center', va='top')
plt.tight_layout()
plt.savefig(os.path.join(OUTPUT_DIR, 'q23_fc_vs_conv1x1.png'), dpi=DPI, bbox_inches='tight',
facecolor='white')
plt.close()
print(" ✓ q23_fc_vs_conv1x1.png")
# ============================================================
# 8. U-NET ARCHITECTURE — Proper U-shaped diagram
# ============================================================
def generate_unet():
fig, ax = plt.subplots(1, 1, figsize=(10, 6))
ax.set_xlim(-1, 21)
ax.set_ylim(-1, 12)
ax.axis('off')
ax.set_title('U-Net: architektura w kształcie litery U', fontsize=FS_TITLE + 1, fontweight='bold')
# Encoder layers (going DOWN-LEFT)
encoder_layers = [
(2, 10, 2.5, 1.5, '572×572×1\n(wejście)', 64),
(2, 7.5, 2.2, 1.3, '284×284\n×64', 64),
(2, 5, 1.8, 1.1, '140×140\n×128', 128),
(2, 2.5, 1.5, 1.0, '68×68\n×256', 256),
]
# Bottleneck
bottleneck = (8, 0.5, 2.5, 1.2, '32×32×512\n(bottleneck)', 512)
# Decoder layers (going UP-RIGHT)
decoder_layers = [
(14, 2.5, 1.5, 1.0, '68×68\n×256', 256),
(14, 5, 1.8, 1.1, '140×140\n×128', 128),
(14, 7.5, 2.2, 1.3, '284×284\n×64', 64),
(14, 10, 2.5, 1.5, '572×572×C\n(mapa seg.)', 'C'),
]
def draw_block(ax, x, y, w, h, label, color):
rect = FancyBboxPatch((x - w/2, y - h/2), w, h,
boxstyle="round,pad=0.05", facecolor=color,
edgecolor=BLACK, linewidth=1.2)
ax.add_patch(rect)
ax.text(x, y, label, ha='center', va='center', fontsize=FS_TINY)
# Draw encoder
for x, y, w, h, label, channels in encoder_layers:
draw_block(ax, x, y, w, h, label, ACCENT_LIGHT)
# Draw arrows down (encoder)
for i in range(len(encoder_layers) - 1):
x1, y1 = encoder_layers[i][0], encoder_layers[i][1] - encoder_layers[i][3]/2
x2, y2 = encoder_layers[i+1][0], encoder_layers[i+1][1] + encoder_layers[i+1][3]/2
ax.annotate('', xy=(x2, y2), xytext=(x1, y1),
arrowprops=dict(arrowstyle='->', color=ACCENT, lw=2))
ax.text(x1 - 1.7, (y1 + y2) / 2, 'MaxPool\n2×2\n↓ zmniejsz', fontsize=FS_TINY,
ha='center', color=ACCENT, fontweight='bold')
# Encoder to bottleneck
x1, y1 = encoder_layers[-1][0], encoder_layers[-1][1] - encoder_layers[-1][3]/2
draw_block(ax, bottleneck[0], bottleneck[1], bottleneck[2], bottleneck[3],
bottleneck[4], GRAY2)
ax.annotate('', xy=(bottleneck[0] - bottleneck[2]/2, bottleneck[1] + bottleneck[3]/2),
xytext=(x1, y1),
arrowprops=dict(arrowstyle='->', color=ACCENT, lw=2))
# Bottleneck to decoder
ax.annotate('', xy=(decoder_layers[0][0] - decoder_layers[0][2]/2,
decoder_layers[0][1] - decoder_layers[0][3]/2),
xytext=(bottleneck[0] + bottleneck[2]/2, bottleneck[1] + bottleneck[3]/2),
arrowprops=dict(arrowstyle='->', color=RED_ACCENT, lw=2))
# Draw decoder
for x, y, w, h, label, channels in decoder_layers:
color = '#C8E6C9' if channels != 'C' else '#A5D6A7'
draw_block(ax, x, y, w, h, label, color)
# Draw arrows up (decoder)
for i in range(len(decoder_layers) - 1):
x1, y1 = decoder_layers[i][0], decoder_layers[i][1] + decoder_layers[i][3]/2
x2, y2 = decoder_layers[i+1][0], decoder_layers[i+1][1] - decoder_layers[i+1][3]/2
ax.annotate('', xy=(x2, y2), xytext=(x1, y1),
arrowprops=dict(arrowstyle='->', color=GREEN_ACCENT, lw=2))
ax.text(x1 + 2, (y1 + y2) / 2, 'UpConv\n2×2\n↑ zwiększ', fontsize=FS_TINY,
ha='center', color=GREEN_ACCENT, fontweight='bold')
# Skip connections (horizontal arrows)
skip_colors = [GRAY5, GRAY5, GRAY5, GRAY5]
for i in range(len(encoder_layers)):
enc = encoder_layers[i]
dec = decoder_layers[len(decoder_layers) - 1 - i]
ax.annotate('', xy=(dec[0] - dec[2]/2, dec[1]),
xytext=(enc[0] + enc[2]/2, enc[1]),
arrowprops=dict(arrowstyle='->', color=GRAY5, lw=1.5,
linestyle='dashed'))
mid_x = (enc[0] + enc[2]/2 + dec[0] - dec[2]/2) / 2
ax.text(mid_x, enc[1] + 0.6, 'skip\n(concat)', fontsize=FS_TINY,
ha='center', color=GRAY5, fontweight='bold')
# Labels
ax.text(0, 11.5, 'ENCODER\n(↓ zmniejsza)', fontsize=FS, fontweight='bold', color=ACCENT,
ha='center')
ax.text(17, 11.5, 'DECODER\n(↑ zwiększa)', fontsize=FS, fontweight='bold', color=GREEN_ACCENT,
ha='center')
ax.text(8, -0.8, 'Kształt litery „U": encoder schodzi ↓ → bottleneck na dnie → decoder wraca ↑',
fontsize=FS_SMALL, ha='center', color=GRAY5, fontweight='bold')
# Concatenation explanation
rect = FancyBboxPatch((17.5, 3), 3, 5, boxstyle="round,pad=0.15",
facecolor=GRAY1, edgecolor=GRAY5, linewidth=1, linestyle='--')
ax.add_patch(rect)
ax.text(19, 7.5, 'Concatenation:', fontsize=FS_SMALL, ha='center', fontweight='bold')
ax.text(19, 6.5, 'Encoder: 64 kanały\nDecoder: 64 kanały\n→ concat → 128 kanałów\n\n'
'Jak sklejenie\ndwóch stosów\nkart:', fontsize=FS_TINY, ha='center')
ax.text(19, 3.7, '[enc₁|enc₂|...|dec₁|dec₂|...]', fontsize=FS_TINY - 1, ha='center',
fontweight='bold', color=ACCENT)
plt.tight_layout()
plt.savefig(os.path.join(OUTPUT_DIR, 'q23_unet_arch.png'), dpi=DPI, bbox_inches='tight',
facecolor='white')
plt.close()
print(" ✓ q23_unet_arch.png")
# ============================================================
# 9. RECEPTIVE FIELD — with dilation
# ============================================================
def generate_receptive_field():
fig, axes = plt.subplots(1, 3, figsize=(11, 4))
def draw_grid(ax, size, highlight_cells, highlight_color, title, grid_offset=(0, 0)):
ox, oy = grid_offset
for i in range(size):
for j in range(size):
color = WHITE
if (i, j) in highlight_cells:
color = highlight_color
rect = patches.Rectangle((ox + j, oy + size - 1 - i), 1, 1,
facecolor=color, edgecolor=GRAY4, linewidth=0.5)
ax.add_patch(rect)
ax.set_title(title, fontsize=FS_TITLE, fontweight='bold')
# --- Panel 1: Standard 3x3 conv receptive field ---
ax = axes[0]
ax.set_xlim(-0.5, 7.5)
ax.set_ylim(-1, 8)
ax.set_aspect('equal')
ax.axis('off')
# 7x7 input grid
highlight_3x3 = [(2, 2), (2, 3), (2, 4), (3, 2), (3, 3), (3, 4), (4, 2), (4, 3), (4, 4)]
draw_grid(ax, 7, highlight_3x3, ACCENT_LIGHT, 'Zwykła conv 3×3')
ax.text(3.5, -0.5, 'RF = 3×3 pikseli', fontsize=FS, ha='center', fontweight='bold', color=ACCENT)
# --- Panel 2: Dilated conv (rate=2) ---
ax = axes[1]
ax.set_xlim(-0.5, 7.5)
ax.set_ylim(-1, 8)
ax.set_aspect('equal')
ax.axis('off')
# 7x7 input grid with dilated highlights
highlight_dilated = [(1, 1), (1, 3), (1, 5), (3, 1), (3, 3), (3, 5), (5, 1), (5, 3), (5, 5)]
draw_grid(ax, 7, highlight_dilated, '#FFCDD2', 'Dilated conv 3×3\n(rate=2)')
ax.text(3.5, -0.5, 'RF = 5×5, ale 9 parametrów!', fontsize=FS, ha='center',
fontweight='bold', color=RED_ACCENT)
# Connect dots to show pattern
dots_x = [1.5, 3.5, 5.5, 1.5, 3.5, 5.5, 1.5, 3.5, 5.5]
dots_y = [5.5, 5.5, 5.5, 3.5, 3.5, 3.5, 1.5, 1.5, 1.5]
ax.scatter(dots_x, dots_y, c=RED_ACCENT, s=30, zorder=5)
# --- Panel 3: Comparison ---
ax = axes[2]
ax.set_xlim(0, 10)
ax.set_ylim(0, 10)
ax.axis('off')
ax.set_title('Receptive Field\n(pole widzenia neuronu)', fontsize=FS_TITLE, fontweight='bold')
y = 8.5
lines = [
('RF = ile pikseli WEJŚCIOWYCH', FS, BLACK, 'bold'),
('wpływa na JEDEN piksel wyjścia', FS, BLACK, 'bold'),
('', 0, '', ''),
('Rate (współczynnik dylatacji):', FS, BLACK, 'bold'),
(' rate=1: filtr „dotyka" sąsiadów', FS_SMALL, BLACK, 'normal'),
(' rate=2: co drugi piksel → RF = 5×5', FS_SMALL, BLACK, 'normal'),
(' rate=3: co trzeci → RF = 7×7', FS_SMALL, BLACK, 'normal'),
(' WIĘCEJ kontekstu, TE SAME wagi!', FS_SMALL, GREEN_ACCENT, 'bold'),
('', 0, '', ''),
('Dlaczego ważne w segmentacji?', FS, BLACK, 'bold'),
(' Piksel sam nie wie czym jest.', FS_SMALL, BLACK, 'normal'),
(' Potrzebuje KONTEKSTU (otoczenia).', FS_SMALL, BLACK, 'normal'),
(' Większe RF → widzi obok budynki', FS_SMALL, BLACK, 'normal'),
(' → wie, że TEN piksel to „droga"', FS_SMALL, GREEN_ACCENT, 'bold'),
('', 0, '', ''),
('Global Average Pooling:', FS, BLACK, 'bold'),
(' Mapa H×W×C → 1×1×C', FS_SMALL, BLACK, 'normal'),
(' Średnia z CAŁEGO feature map', FS_SMALL, BLACK, 'normal'),
(' RF = nieskończone (cały obraz)', FS_SMALL, GREEN_ACCENT, 'bold'),
]
for txt, size, color, weight in lines:
if txt == '':
y -= 0.2
continue
ax.text(0.5, y, txt, fontsize=size, color=color, fontweight=weight, va='top')
y -= 0.45
plt.tight_layout()
plt.savefig(os.path.join(OUTPUT_DIR, 'q23_receptive_field.png'), dpi=DPI, bbox_inches='tight',
facecolor='white')
plt.close()
print(" ✓ q23_receptive_field.png")
# ============================================================
# 10. TRANSFORMER / Self-attention / SOTA
# ============================================================
def generate_transformer():
fig, axes = plt.subplots(1, 3, figsize=(11, 4))
# --- Panel 1: CNN local vs Transformer global ---
ax = axes[0]
ax.set_xlim(-0.5, 8.5)
ax.set_ylim(-1.5, 8.5)
ax.set_aspect('equal')
ax.axis('off')
ax.set_title('CNN: widzi LOKALNIE', fontsize=FS_TITLE, fontweight='bold')
# Draw 8x8 grid
for i in range(8):
for j in range(8):
color = WHITE
if 3 <= i <= 5 and 3 <= j <= 5:
color = ACCENT_LIGHT
rect = patches.Rectangle((j, 7 - i), 1, 1,
facecolor=color, edgecolor=GRAY3, linewidth=0.3)
ax.add_patch(rect)
# Highlight center
rect = patches.Rectangle((4, 4), 1, 1, facecolor=RED_ACCENT, edgecolor=BLACK, linewidth=1.5, alpha=0.7)
ax.add_patch(rect)
ax.text(4.5, 4.5, '?', ha='center', va='center', fontsize=FS, fontweight='bold', color=WHITE)
ax.text(4.5, -0.8, 'Filtr 3×3 widzi tylko\n9 sąsiednich pikseli', fontsize=FS_SMALL,
ha='center', color=ACCENT)
# --- Panel 2: Transformer global ---
ax = axes[1]
ax.set_xlim(-0.5, 8.5)
ax.set_ylim(-1.5, 8.5)
ax.set_aspect('equal')
ax.axis('off')
ax.set_title('Transformer: widzi GLOBALNIE', fontsize=FS_TITLE, fontweight='bold')
# Draw 8x8 grid all highlighted
for i in range(8):
for j in range(8):
color = '#FFCDD2'
rect = patches.Rectangle((j, 7 - i), 1, 1,
facecolor=color, edgecolor=GRAY3, linewidth=0.3)
ax.add_patch(rect)
rect = patches.Rectangle((4, 4), 1, 1, facecolor=RED_ACCENT, edgecolor=BLACK, linewidth=1.5, alpha=0.9)
ax.add_patch(rect)
ax.text(4.5, 4.5, '?', ha='center', va='center', fontsize=FS, fontweight='bold', color=WHITE)
ax.text(4.5, -0.8, 'Self-attention „pyta"\nALL 64 piksele naraz', fontsize=FS_SMALL,
ha='center', color=RED_ACCENT)
# --- Panel 3: SOTA + Transformer explanation ---
ax = axes[2]
ax.set_xlim(0, 10)
ax.set_ylim(0, 10)
ax.axis('off')
ax.set_title('Transformer & SOTA', fontsize=FS_TITLE, fontweight='bold')
y = 9.2
lines = [
('Transformer:', FS, BLACK, 'bold'),
(' Architektura z 2017 (Vaswani et al.)', FS_SMALL, BLACK, 'normal'),
(' Oryginalnie do NLP (tłumaczenie)', FS_SMALL, BLACK, 'normal'),
(' Kluczowy mechanizm: SELF-ATTENTION', FS_SMALL, ACCENT, 'bold'),
('', 0, '', ''),
('Self-attention w skrócie:', FS, BLACK, 'bold'),
(' Każdy piksel tworzy trzy wektory:', FS_SMALL, BLACK, 'normal'),
(' Q (Query — „czego szukam?")', FS_SMALL, ACCENT, 'normal'),
(' K (Key — „co oferuję innych")', FS_SMALL, RED_ACCENT, 'normal'),
(' V (Value — „moja wartość")', FS_SMALL, GREEN_ACCENT, 'normal'),
(' Attention = softmax(Q·Kᵀ/√d)·V', FS_SMALL, BLACK, 'bold'),
(' Koszt: O(n²) — n=liczba pikseli', FS_SMALL, RED_ACCENT, 'normal'),
('', 0, '', ''),
('SOTA = State Of The Art:', FS, BLACK, 'bold'),
(' Najlepszy znany wynik na benchmarku', FS_SMALL, BLACK, 'normal'),
(' Np. „mIoU 85.1% na ADE20K = SOTA"', FS_SMALL, BLACK, 'normal'),
(' Ciągle się zmienia (nowy paper', FS_SMALL, GRAY5, 'normal'),
(' → nowy SOTA)', FS_SMALL, GRAY5, 'normal'),
]
for txt, size, color, weight in lines:
if txt == '':
y -= 0.15
continue
ax.text(0.3, y, txt, fontsize=size, color=color, fontweight=weight, va='top')
y -= 0.45
plt.tight_layout()
plt.savefig(os.path.join(OUTPUT_DIR, 'q23_transformer_attention.png'), dpi=DPI, bbox_inches='tight',
facecolor='white')
plt.close()
print(" ✓ q23_transformer_attention.png")
# ============================================================
# 11. REGION GROWING — seed selection + BFS
# ============================================================
def generate_region_growing():
fig, axes = plt.subplots(1, 3, figsize=(11, 4.2))
# --- Panel 1: Manual vs automatic seed ---
ax = axes[0]
ax.set_xlim(0, 10)
ax.set_ylim(0, 10)
ax.axis('off')
ax.set_title('Seed: ręcznie vs automatycznie', fontsize=FS_TITLE, fontweight='bold')
y = 9.2
lines = [
('Ręczny seed:', FS, ACCENT, 'bold'),
(' Użytkownik klika na obraz', FS_SMALL, BLACK, 'normal'),
(' → „tu jest obiekt, od tego zacznij"', FS_SMALL, BLACK, 'normal'),
(' Użycie: segmentacja interaktywna', FS_SMALL, GRAY5, 'normal'),
(' (np. Photoshop — magic wand tool)', FS_SMALL, GRAY5, 'normal'),
('', 0, '', ''),
('Automatyczny seed:', FS, RED_ACCENT, 'bold'),
(' 1. Histogram → lokalne maxima', FS_SMALL, BLACK, 'normal'),
(' (najczęstsza jasność → seed)', FS_SMALL, GRAY5, 'normal'),
(' 2. Grid: siatka co N pikseli', FS_SMALL, BLACK, 'normal'),
(' (np. seed co 50 px → 100 seedów)', FS_SMALL, GRAY5, 'normal'),
(' 3. Losowe próbkowanie', FS_SMALL, BLACK, 'normal'),
(' 4. Ekstrema lokalne gradientu', FS_SMALL, BLACK, 'normal'),
('', 0, '', ''),
('Dlaczego OR?', FS, GREEN_ACCENT, 'bold'),
(' Ręczny → precyzyjny, ale wolny', FS_SMALL, BLACK, 'normal'),
(' Auto → szybki, ale over-segmentation', FS_SMALL, BLACK, 'normal'),
]
for txt, size, color, weight in lines:
if txt == '':
y -= 0.15
continue
ax.text(0.3, y, txt, fontsize=size, color=color, fontweight=weight, va='top')
y -= 0.45
# --- Panel 2: Region growing step by step ---
ax = axes[1]
ax.set_xlim(-0.5, 6.5)
ax.set_ylim(-1.5, 7.5)
ax.set_aspect('equal')
ax.axis('off')
ax.set_title('Region Growing: krok po kroku', fontsize=FS_TITLE, fontweight='bold')
# 6x6 grid with values
pixel_grid = np.array([
[150, 153, 148, 200, 210, 205],
[147, 155, 152, 195, 208, 200],
[145, 148, 160, 190, 195, 210],
[200, 195, 190, 155, 148, 150],
[210, 205, 200, 150, 152, 145],
[215, 208, 195, 148, 147, 155],
])
# Region grown from seed (2,1) with threshold 20
region_mask = np.array([
[1, 1, 1, 0, 0, 0],
[1, 1, 1, 0, 0, 0],
[1, 1, 1, 0, 0, 0],
[0, 0, 0, 1, 1, 1],
[0, 0, 0, 1, 1, 1],
[0, 0, 0, 1, 1, 1],
])
for i in range(6):
for j in range(6):
v = pixel_grid[i, j]
if region_mask[i, j] == 1 and v < 170:
color = ACCENT_LIGHT
elif region_mask[i, j] == 1:
color = GRAY2
else:
color = WHITE
if i == 1 and j == 1:
color = '#FFD54F' # Seed
rect = patches.Rectangle((j, 5 - i), 1, 1,
facecolor=color, edgecolor=GRAY4, linewidth=0.5)
ax.add_patch(rect)
ax.text(j + 0.5, 5 - i + 0.5, str(v), ha='center', va='center',
fontsize=FS_TINY, fontweight='bold')
# Mark seed
ax.annotate('SEED\n(155)', xy=(1.5, 4.5), fontsize=FS_SMALL,
ha='center', color=RED_ACCENT, fontweight='bold',
arrowprops=dict(arrowstyle='->', color=RED_ACCENT), xytext=(-0.5, 7))
ax.text(3, -0.8, 'Próg = 20\nNiebieski = region (|val - seed| < 20)',
fontsize=FS_TINY, ha='center', color=ACCENT)
# --- Panel 3: BFS expansion ---
ax = axes[2]
ax.set_xlim(-0.5, 6.5)
ax.set_ylim(-1.5, 7.5)
ax.set_aspect('equal')
ax.axis('off')
ax.set_title('Rosnący region (BFS)', fontsize=FS_TITLE, fontweight='bold')
# Show expansion waves
wave_colors = ['#FFD54F', '#FFF176', '#FFF9C4', ACCENT_LIGHT, '#B3D4FC']
wave_labels = ['Seed', 'Fala 1', 'Fala 2', 'Fala 3', 'Fala 4']
waves = [
[(1, 1)], # seed
[(0, 1), (1, 0), (1, 2), (2, 1)], # wave 1
[(0, 0), (0, 2), (2, 0), (2, 2)], # wave 2
]
for i in range(6):
for j in range(6):
color = WHITE
for w_idx, wave in enumerate(waves):
if (i, j) in wave:
color = wave_colors[w_idx]
rect = patches.Rectangle((j, 5 - i), 1, 1,
facecolor=color, edgecolor=GRAY4, linewidth=0.5)
ax.add_patch(rect)
# Draw BFS arrows from seed
seed_x, seed_y = 1.5, 4.5
for dx, dy, label in [(0, 1, ''), (0, -1, ''), (1, 0, ''), (-1, 0, '')]:
ax.annotate('', xy=(seed_x + dx * 0.7, seed_y + dy * 0.7),
xytext=(seed_x, seed_y),
arrowprops=dict(arrowstyle='->', color=RED_ACCENT, lw=1.2))
ax.text(3, -0.5, 'BFS: sprawdzaj sąsiadów,\ndodawaj podobne do kolejki',
fontsize=FS_TINY, ha='center', color=GRAY5)
# Legend
for w_idx, (color, label) in enumerate(zip(wave_colors[:3], wave_labels[:3])):
rect = patches.Rectangle((4, 6.5 - w_idx * 0.7), 0.5, 0.5,
facecolor=color, edgecolor=GRAY4, linewidth=0.5)
ax.add_patch(rect)
ax.text(4.8, 6.75 - w_idx * 0.7, label, fontsize=FS_TINY, va='center')
plt.tight_layout()
plt.savefig(os.path.join(OUTPUT_DIR, 'q23_region_growing.png'), dpi=DPI, bbox_inches='tight',
facecolor='white')
plt.close()
print(" ✓ q23_region_growing.png")
# ============================================================
# 12. DIY THRESHOLDING — Step-by-step example
# ============================================================
def generate_diy_thresholding():
fig, axes = plt.subplots(2, 3, figsize=(11, 7))
np.random.seed(42)
# Create a simple synthetic image: dark circle on bright background
size = 64
img = np.ones((size, size)) * 200 # bright background
yy, xx = np.mgrid[:size, :size]
mask = ((xx - 32)**2 + (yy - 32)**2) < 15**2
img[mask] = 60 # dark circle
# Add some noise
img += np.random.normal(0, 10, img.shape)
img = np.clip(img, 0, 255)
# --- Panel 1: Original image ---
ax = axes[0, 0]
ax.imshow(img, cmap='gray', vmin=0, vmax=255)
ax.set_title('Krok 1: obraz wejściowy', fontsize=FS, fontweight='bold')
ax.axis('off')
ax.text(32, -3, '64×64 pikseli, szare', fontsize=FS_TINY, ha='center')
# --- Panel 2: Histogram ---
ax = axes[0, 1]
counts, bins, _ = ax.hist(img.ravel(), bins=50, color=GRAY3, edgecolor=GRAY5, linewidth=0.5)
ax.axvline(x=128, color=RED_ACCENT, linewidth=2, linestyle='--', label='T=128 (Otsu)')
ax.set_xlabel('Jasność', fontsize=FS_SMALL)
ax.set_ylabel('Piksele', fontsize=FS_SMALL)
ax.set_title('Krok 2: histogram\n(bimodalny!)', fontsize=FS, fontweight='bold')
ax.legend(fontsize=FS_TINY)
ax.annotate('Garb 1\n(obiekt)', xy=(60, max(counts)*0.5), fontsize=FS_TINY, ha='center',
color=ACCENT, fontweight='bold')
ax.annotate('Garb 2\n(tło)', xy=(200, max(counts)*0.5), fontsize=FS_TINY, ha='center',
color=RED_ACCENT, fontweight='bold')
# --- Panel 3: Thresholding result ---
ax = axes[0, 2]
binary = (img > 128).astype(float)
ax.imshow(binary, cmap='gray', vmin=0, vmax=1)
ax.set_title('Krok 3: progowanie T=128', fontsize=FS, fontweight='bold')
ax.axis('off')
ax.text(32, -3, 'Biały = tło, Czarny = obiekt', fontsize=FS_TINY, ha='center')
# --- Panel 4: What Otsu does (variance plot) ---
ax = axes[1, 0]
# Compute within-class variance for each threshold
thresholds = range(10, 245)
variances = []
for t in thresholds:
c0 = img[img <= t].ravel()
c1 = img[img > t].ravel()
if len(c0) == 0 or len(c1) == 0:
variances.append(np.nan)
continue
w0 = len(c0) / len(img.ravel())
w1 = len(c1) / len(img.ravel())
var = w0 * np.var(c0) + w1 * np.var(c1)
variances.append(var)
ax.plot(list(thresholds), variances, color=ACCENT, linewidth=1.5)
best_t = list(thresholds)[np.nanargmin(variances)]
ax.axvline(x=best_t, color=RED_ACCENT, linewidth=1.5, linestyle='--', label=f'Otsu T={best_t}')
ax.scatter([best_t], [np.nanmin(variances)], c=RED_ACCENT, s=60, zorder=5)
ax.set_xlabel('Próg T', fontsize=FS_SMALL)
ax.set_ylabel('σ² wewnątrzklasowa', fontsize=FS_SMALL)
ax.set_title('Krok 4: Otsu szuka min σ²', fontsize=FS, fontweight='bold')
ax.legend(fontsize=FS_TINY)
# --- Panel 5: Pseudocode ---
ax = axes[1, 1]
ax.set_xlim(0, 10)
ax.set_ylim(0, 10)
ax.axis('off')
ax.set_title('Pseudokod Otsu', fontsize=FS, fontweight='bold')
code_lines = [
'best_T = 0',
'min_var = ∞',
'',
'for T in 0..255:',
' c0 = piksele z jasność ≤ T',
' c1 = piksele z jasność > T',
' w0 = len(c0) / len(all)',
' w1 = len(c1) / len(all)',
' var = w0·var(c0) + w1·var(c1)',
' if var < min_var:',
' min_var = var',
' best_T = T',
'',
'return best_T # optymalny próg',
]
for i, line in enumerate(code_lines):
color = ACCENT if 'best_T = T' in line or 'return' in line else BLACK
ax.text(0.5, 9.5 - i * 0.65, line, fontsize=FS_TINY, fontfamily='monospace',
color=color, fontweight='bold' if color == ACCENT else 'normal')
# --- Panel 6: Final result with Otsu ---
ax = axes[1, 2]
binary_otsu = (img > best_t).astype(float)
ax.imshow(binary_otsu, cmap='gray', vmin=0, vmax=1)
ax.set_title(f'Krok 5: wynik Otsu (T={best_t})', fontsize=FS, fontweight='bold')
ax.axis('off')
ax.text(32, -3, 'Automatyczny próg!', fontsize=FS_TINY, ha='center',
color=GREEN_ACCENT, fontweight='bold')
plt.tight_layout()
plt.savefig(os.path.join(OUTPUT_DIR, 'q23_diy_thresholding.png'), dpi=DPI, bbox_inches='tight',
facecolor='white')
plt.close()
print(" ✓ q23_diy_thresholding.png")
# ============================================================
# 13. DIY U-NET — Simplified step-by-step
# ============================================================
def generate_diy_unet():
fig, axes = plt.subplots(2, 3, figsize=(11, 7))
np.random.seed(42)
size = 64
# Create synthetic image with two regions
img = np.ones((size, size, 3), dtype=np.uint8) * 200 # bright bg
# Dark region (object 1)
yy, xx = np.mgrid[:size, :size]
mask1 = ((xx - 20)**2 + (yy - 30)**2) < 12**2
img[mask1] = [60, 60, 60]
# Medium region (object 2)
mask2 = ((xx - 45)**2 + (yy - 25)**2) < 8**2
img[mask2] = [120, 120, 120]
gt = np.zeros((size, size), dtype=np.uint8)
gt[mask1] = 1 # class 1
gt[mask2] = 2 # class 2
# --- Panel 1: Input image ---
ax = axes[0, 0]
ax.imshow(img)
ax.set_title('Krok 1: obraz RGB\n64×64×3', fontsize=FS, fontweight='bold')
ax.axis('off')
# --- Panel 2: Encoder shrinks ---
ax = axes[0, 1]
ax.set_xlim(0, 10)
ax.set_ylim(0, 10)
ax.axis('off')
ax.set_title('Krok 2: Encoder ZMNIEJSZA', fontsize=FS, fontweight='bold')
sizes = [(64, 3), (32, 64), (16, 128), (8, 256)]
y_pos = 8.5
for i, (s, c) in enumerate(sizes):
w = s / 64 * 4
h = 0.8
rect = FancyBboxPatch((5 - w/2, y_pos), w, h,
boxstyle="round,pad=0.05", facecolor=ACCENT_LIGHT,
edgecolor=ACCENT, linewidth=1)
ax.add_patch(rect)
ax.text(5, y_pos + h/2, f'{s}×{s}×{c}', ha='center', va='center',
fontsize=FS_SMALL, fontweight='bold')
if i < len(sizes) - 1:
ax.annotate('', xy=(5, y_pos - 0.3), xytext=(5, y_pos),
arrowprops=dict(arrowstyle='->', color=ACCENT, lw=1.5))
ax.text(7, y_pos - 0.15, 'Conv+Pool', fontsize=FS_TINY, color=ACCENT)
y_pos -= 2.2
ax.text(5, 0.3, 'Wyciąga cechy:\nkrawędzie → tekstury → obiekty',
ha='center', fontsize=FS_TINY, color=GRAY5)
# --- Panel 3: Bottleneck ---
ax = axes[0, 2]
# Show feature maps at bottleneck (abstract)
ax.set_xlim(0, 10)
ax.set_ylim(0, 10)
ax.axis('off')
ax.set_title('Krok 3: Bottleneck\n(najbardziej abstrakcyjne cechy)', fontsize=FS, fontweight='bold')
# Show small abstract feature maps
for k in range(4):
small = np.random.rand(4, 4)
ax_inset = fig.add_axes([0.68 + (k % 2) * 0.08, 0.72 - (k // 2) * 0.1, 0.06, 0.06])
ax_inset.imshow(small, cmap='gray')
ax_inset.axis('off')
ax.text(5, 5, '8×8×256\n\nMałe mapy, ale DUŻO kanałów\nKażdy kanał = jedna „cecha"\n'
'(np. kanał 42 = „wykrył koło"\n kanał 78 = „wykrył krawędź")\n\n'
'Wie CO jest na obrazie\nale nie wie GDZIE dokładnie',
ha='center', va='center', fontsize=FS_SMALL,
bbox=dict(boxstyle='round', facecolor=GRAY1, edgecolor=GRAY3))
# --- Panel 4: Decoder enlarges ---
ax = axes[1, 0]
ax.set_xlim(0, 10)
ax.set_ylim(0, 10)
ax.axis('off')
ax.set_title('Krok 4: Decoder ZWIĘKSZA\n(+ skip connections!)', fontsize=FS, fontweight='bold')
sizes_dec = [(8, 256), (16, 128), (32, 64), (64, 3)]
y_pos = 8.5
for i, (s, c) in enumerate(sizes_dec):
w = s / 64 * 4
h = 0.8
rect = FancyBboxPatch((5 - w/2, y_pos), w, h,
boxstyle="round,pad=0.05", facecolor='#C8E6C9',
edgecolor=GREEN_ACCENT, linewidth=1)
ax.add_patch(rect)
label = f'{s}×{s}×{c}'
if i < len(sizes_dec) - 1:
label += ' + skip!'
ax.text(5, y_pos + h/2, label, ha='center', va='center',
fontsize=FS_SMALL, fontweight='bold')
if i < len(sizes_dec) - 1:
ax.annotate('', xy=(5, y_pos - 0.3), xytext=(5, y_pos),
arrowprops=dict(arrowstyle='->', color=GREEN_ACCENT, lw=1.5))
ax.text(7, y_pos - 0.15, 'UpConv+Concat', fontsize=FS_TINY, color=GREEN_ACCENT)
y_pos -= 2.2
ax.text(5, 0.3, 'Odtwarza rozdzielczość:\nskip → przywraca krawędzie',
ha='center', fontsize=FS_TINY, color=GRAY5)
# --- Panel 5: Output segmentation map ---
ax = axes[1, 1]
cmap = plt.cm.colors.ListedColormap([WHITE, ACCENT_LIGHT, '#FFCDD2'])
ax.imshow(gt, cmap=cmap, interpolation='nearest')
ax.set_title('Krok 5: mapa segmentacji\n64×64 (3 klasy)', fontsize=FS, fontweight='bold')
ax.axis('off')
ax.text(20, -3, 'Tło=0, obiekt A=1, obiekt B=2', fontsize=FS_TINY, ha='center')
# --- Panel 6: Summary pseudocode ---
ax = axes[1, 2]
ax.set_xlim(0, 10)
ax.set_ylim(0, 10)
ax.axis('off')
ax.set_title('Pseudokod U-Net', fontsize=FS, fontweight='bold')
code_lines = [
'# ENCODER',
'e1 = conv_block(input, 64) # 64×64',
'e2 = conv_block(pool(e1), 128) # 32×32',
'e3 = conv_block(pool(e2), 256) # 16×16',
'',
'# BOTTLENECK',
'b = conv_block(pool(e3), 512) # 8×8',
'',
'# DECODER + SKIP',
'd3 = conv_block(concat(',
' upconv(b), e3), 256) # 16×16',
'd2 = conv_block(concat(',
' upconv(d3), e2), 128) # 32×32',
'd1 = conv_block(concat(',
' upconv(d2), e1), 64) # 64×64',
'',
'output = conv_1x1(d1, n_classes)',
]
for i, line in enumerate(code_lines):
color = ACCENT if 'concat' in line else (GREEN_ACCENT if 'output' in line else BLACK)
ax.text(0.3, 9.5 - i * 0.55, line, fontsize=FS_TINY, fontfamily='monospace',
color=color)
plt.tight_layout()
plt.savefig(os.path.join(OUTPUT_DIR, 'q23_diy_unet.png'), dpi=DPI, bbox_inches='tight',
facecolor='white')
plt.close()
print(" ✓ q23_diy_unet.png")
# ============================================================
# 14. MNEMONICS — Visual mnemonic summary
# ============================================================
def generate_mnemonics():
fig, ax = plt.subplots(1, 1, figsize=(10, 8))
ax.set_xlim(0, 20)
ax.set_ylim(0, 16)
ax.axis('off')
ax.set_title('Mnemoniki — segmentacja obrazu', fontsize=FS_TITLE + 2, fontweight='bold')
def draw_card(ax, x, y, w, h, title, mnemonic, color, detail=''):
rect = FancyBboxPatch((x, y), w, h, boxstyle="round,pad=0.15",
facecolor=color, edgecolor=BLACK, linewidth=1)
ax.add_patch(rect)
ax.text(x + w/2, y + h - 0.3, title, ha='center', va='top',
fontsize=FS, fontweight='bold')
ax.text(x + w/2, y + h/2 - 0.1, mnemonic, ha='center', va='center',
fontsize=FS_SMALL, fontstyle='italic', color=GRAY6)
if detail:
ax.text(x + w/2, y + 0.4, detail, ha='center', va='bottom',
fontsize=FS_TINY, color=GRAY5)
# Title: STRATEGIE KLASYCZNE
ax.text(5, 15.5, 'STRATEGIE KLASYCZNE', fontsize=FS_TITLE, fontweight='bold',
color=ACCENT, ha='center')
cards_classic = [
(0.2, 12.5, 4.5, 2.5, 'Thresholding', '„PRÓG na bramce"\nPrzepuszcza > T,\nblokuje ≤ T',
ACCENT_LIGHT, 'jasne=1, ciemne=0'),
(5, 12.5, 4.5, 2.5, 'Otsu', '„AUTO-bramkarz"\nSam dobiera próg\nmin σ² wewnątrz',
ACCENT_LIGHT, 'histogram bimodalny'),
(0.2, 9.5, 4.5, 2.5, 'Region Growing', '„PLAMA rozlana"\nSeed → BFS po\npodobnych sąsiadach',
ACCENT_LIGHT, 'jak atrament na papierze'),
(5, 9.5, 4.5, 2.5, 'Watershed', '„ZALEWANIE terenu"\nDoliny=obiekty\nGranie=granice',
ACCENT_LIGHT, 'woda + geography'),
(0.2, 6.5, 4.5, 2.5, 'Mean Shift', '„KULKI toczą się"\nKażda → max gęstości\nBez K!',
ACCENT_LIGHT, 'bandwidth = okno'),
(5, 6.5, 4.5, 2.5, 'Normalized Cuts', '„CIĘCIE sznurków"\nGraf: tnij słabe\nkrawędzie (O(n³)!)',
ACCENT_LIGHT, 'eigenvector problem'),
]
for args in cards_classic:
draw_card(ax, *args)
# Title: SIECI NEURONOWE
ax.text(15, 15.5, 'SIECI NEURONOWE', fontsize=FS_TITLE, fontweight='bold',
color=GREEN_ACCENT, ha='center')
cards_nn = [
(10.5, 12.5, 4.5, 2.5, 'FCN (2015)', '„FC → Conv 1×1"\nPierwsza end-to-end\nDowolny rozmiar',
'#C8E6C9', 'skip connections'),
(15.3, 12.5, 4.5, 2.5, 'U-Net (2015)', '„Litera U"\nEncoder↓ Decoder↑\nSkip = concat',
'#C8E6C9', 'medycyna, małe dane'),
(10.5, 9.5, 4.5, 2.5, 'DeepLab v3+', '„DZIURY w filtrze"\nAtrous conv (rate)\nASPP multi-scale',
'#C8E6C9', 'à trous = z dziurami'),
(15.3, 9.5, 4.5, 2.5, 'Transformer', '„WSZYSCY ze\nWSZYSTKIMI"\nSelf-attention O(n²)',
'#C8E6C9', 'SegFormer, Mask2Former'),
]
for args in cards_nn:
draw_card(ax, *args)
# Metryki
ax.text(10, 8.3, 'METRYKI I LOSS', fontsize=FS_TITLE, fontweight='bold',
color=RED_ACCENT, ha='center')
cards_metrics = [
(10.5, 6.5, 4.5, 1.6, 'mIoU', '„Nakładka / Suma"\nIoU = A∩B / AB',
'#FFCDD2', ''),
(15.3, 6.5, 4.5, 1.6, 'Dice / Focal', '„Dice=2·nakładka"\nFocal=trudne px',
'#FFCDD2', ''),
]
for args in cards_metrics:
draw_card(ax, *args)
# Master mnemonic at bottom
rect = FancyBboxPatch((1, 0.3), 18, 5.5, boxstyle="round,pad=0.2",
facecolor=GRAY1, edgecolor=BLACK, linewidth=1.5)
ax.add_patch(rect)
ax.text(10, 5.3, 'SUPER-MNEMONIK: kolejność algorytmów segmentacji',
ha='center', fontsize=FS, fontweight='bold')
ax.text(10, 4.5, '„TORW-MN FUD-T"', ha='center', fontsize=FS_TITLE + 2,
fontweight='bold', color=RED_ACCENT)
ax.text(10, 3.5, 'Klasyczne: Thresholding → Otsu → Region growing → Watershed → Mean shift → Norm. cuts',
ha='center', fontsize=FS_SMALL)
ax.text(10, 2.8, 'Neuronowe: FCN → U-Net → DeepLab → Transformer',
ha='center', fontsize=FS_SMALL)
ax.text(10, 1.8, '„Turyści Oglądają Rzekę, Wodospad, Morze, Nurt — Fotografują Uroczy Dwór Tajemnic"',
ha='center', fontsize=FS_SMALL, fontstyle='italic', color=ACCENT)
ax.text(10, 1.0, 'Klasyczne: proste→auto→BFS→flood→gęstość→graf | Neuronowe: FC→U-skip→dilated→attention',
ha='center', fontsize=FS_TINY, color=GRAY5)
plt.tight_layout()
plt.savefig(os.path.join(OUTPUT_DIR, 'q23_mnemonics.png'), dpi=DPI, bbox_inches='tight',
facecolor='white')
plt.close()
print(" ✓ q23_mnemonics.png")
# ============================================================
# MAIN
# ============================================================
if __name__ == '__main__':
print("Generating PYTANIE 23 diagrams...")
generate_otsu_bimodal()
generate_watershed()
generate_mean_shift()
generate_normalized_cuts()
generate_relu()
generate_dot_product()
generate_fcn()
generate_unet()
generate_receptive_field()
generate_transformer()
generate_region_growing()
generate_diy_thresholding()
generate_diy_unet()
generate_mnemonics()
print(f"\nAll diagrams saved to: {OUTPUT_DIR}")