testsAndMisc/python_pkg/praca_magisterska_video/_q23_classical.py

446 lines
15 KiB
Python
Raw Normal View History

"""Classical segmentation methods: concept, thresholding, region growing, watershed."""
from __future__ import annotations
from moviepy import (
CompositeVideoClip,
VideoClip,
)
from moviepy.video.fx import FadeIn, FadeOut
import numpy as np
from python_pkg.praca_magisterska_video._q23_helpers import (
BG_COLOR,
FONT_B,
FONT_R,
FPS,
STEP_DUR,
H,
W,
_tc,
)
# ── Segmentation concept ─────────────────────────────────────────
def _segmentation_concept() -> list[CompositeVideoClip]:
"""Show what segmentation is: pixel-level labeling."""
slides = []
# Synthetic image: grid of colored pixels
def make_image_frame(_t: float) -> np.ndarray:
frame = np.zeros((H, W, 3), dtype=np.uint8)
frame[:] = BG_COLOR
# Draw a small "image" grid
grid_x, grid_y = 100, 150
cell = 40
# Sky (top rows)
colors_map = [
[(135, 206, 235)] * 8, # sky
[(135, 206, 235)] * 5 + [(34, 139, 34)] * 3, # sky + tree
[(34, 139, 34)] * 3
+ [(128, 128, 128)] * 3
+ [(34, 139, 34)] * 2, # tree+road+tree
[(128, 128, 128)] * 3
+ [(200, 50, 50)] * 2
+ [(128, 128, 128)] * 3, # road+car+road
]
labels_map = [
["niebo"] * 8,
["niebo"] * 5 + ["drzewo"] * 3,
["drzewo"] * 3 + ["droga"] * 3 + ["drzewo"] * 2,
["droga"] * 3 + ["samochód"] * 2 + ["droga"] * 3,
]
label_colors = {
"niebo": (100, 180, 255),
"drzewo": (50, 200, 50),
"droga": (180, 180, 180),
"samochód": (255, 80, 80),
}
for r, row in enumerate(colors_map):
for c, col in enumerate(row):
y = grid_y + r * cell
x = grid_x + c * cell
frame[y : y + cell - 2, x : x + cell - 2] = col
# Draw segmentation map on the right
seg_x = 600
for r, row in enumerate(labels_map):
for c, lab in enumerate(row):
y = grid_y + r * cell
x = seg_x + c * cell
frame[y : y + cell - 2, x : x + cell - 2] = label_colors[lab]
return frame
image_clip = VideoClip(make_image_frame, duration=STEP_DUR).with_fps(FPS)
labels_text = [
("Obraz wejściowy", 22, "white", FONT_B, (170, 100)),
("Mapa segmentacji", 22, "white", FONT_B, (660, 100)),
("", 50, "#FFE082", FONT_B, (450, 250)),
("Każdy piksel → etykieta klasy", 20, "#B0BEC5", FONT_R, (100, 420)),
("niebo | drzewo | droga | samochód", 18, "#90CAF9", FONT_R, (600, 420)),
("Segmentacja = klasyfikacja per-piksel", 24, "#FFE082", FONT_B, (100, 500)),
(
"Semantic: klasy bez instancji | Instance: "
"rozróżnia obiekty | Panoptic: oba",
16,
"#78909C",
FONT_R,
(100, 560),
),
]
clips: list[VideoClip] = [image_clip]
for text, fs, color, font, pos in labels_text:
tc = (
_tc(text=text, font_size=fs, color=color, font=font)
.with_duration(STEP_DUR)
.with_position(pos)
)
clips.append(tc)
slides.append(
CompositeVideoClip(clips, size=(W, H)).with_effects([FadeIn(0.3), FadeOut(0.3)])
)
return slides
# ── Thresholding / Otsu ───────────────────────────────────────────
def _thresholding_demo() -> list[CompositeVideoClip]:
"""Animate thresholding and Otsu concept."""
slides = []
# Show histogram & threshold
def make_threshold_frame(t: float) -> np.ndarray:
frame = np.zeros((H, W, 3), dtype=np.uint8)
frame[:] = BG_COLOR
# Draw bimodal histogram bars
bar_start_x = 80
bar_y = 500
bar_w = 4
for i in range(256):
# Bimodal: peaks at 60 and 190
h1 = 200 * np.exp(-((i - 60) ** 2) / (2 * 20**2))
h2 = 150 * np.exp(-((i - 190) ** 2) / (2 * 25**2))
bar_h = int(h1 + h2)
x = bar_start_x + i * bar_w
if x + bar_w < W:
frame[bar_y - bar_h : bar_y, x : x + bar_w - 1] = (150, 150, 170)
# Animated threshold line
threshold = int(60 + (190 - 60) * min(t / (STEP_DUR * 0.7), 1.0))
tx = bar_start_x + threshold * bar_w
if tx < W:
frame[bar_y - 250 : bar_y + 10, tx : tx + 3] = (255, 80, 80)
# Color the two sides
for i in range(threshold):
x = bar_start_x + i * bar_w
h1 = 200 * np.exp(-((i - 60) ** 2) / (2 * 20**2))
h2 = 150 * np.exp(-((i - 190) ** 2) / (2 * 25**2))
bar_h = int(h1 + h2)
if x + bar_w < W and bar_h > 0:
frame[bar_y - bar_h : bar_y, x : x + bar_w - 1] = (70, 130, 200)
for i in range(threshold, 256):
x = bar_start_x + i * bar_w
h1 = 200 * np.exp(-((i - 60) ** 2) / (2 * 20**2))
h2 = 150 * np.exp(-((i - 190) ** 2) / (2 * 25**2))
bar_h = int(h1 + h2)
if x + bar_w < W and bar_h > 0:
frame[bar_y - bar_h : bar_y, x : x + bar_w - 1] = (200, 100, 80)
return frame
hist_clip = VideoClip(make_threshold_frame, duration=STEP_DUR).with_fps(FPS)
text_clips: list[VideoClip] = [hist_clip]
labels = [
("Progowanie (Thresholding) z metodą Otsu", 28, "#FFE082", FONT_B, (80, 30)),
(
"Histogram jasności pikseli — dwumodalny (bimodal)",
20,
"#B0BEC5",
FONT_R,
(80, 80),
),
("Garb 1: piksele obiektu (ciemne ~60)", 16, "#64B5F6", FONT_R, (80, 120)),
("Garb 2: piksele tła (jasne ~190)", 16, "#EF9A9A", FONT_R, (80, 150)),
(
"Próg T (czerwona linia) dzieli piksele na 2 klasy",
18,
"white",
FONT_R,
(80, 540),
),
(
"Otsu: automatycznie testuje T=0..255, minimalizuje σ² wewnątrzklasową",
16,
"#A5D6A7",
FONT_R,
(80, 580),
),
(
"Piksel ≤ T → klasa 0 (tło) | Piksel > T → klasa 1 (obiekt)",
16,
"#78909C",
FONT_R,
(80, 620),
),
]
for text, fs, color, font, pos in labels:
tc = (
_tc(text=text, font_size=fs, color=color, font=font)
.with_duration(STEP_DUR)
.with_position(pos)
)
text_clips.append(tc)
slides.append(
CompositeVideoClip(text_clips, size=(W, H)).with_effects(
[FadeIn(0.3), FadeOut(0.3)]
)
)
return slides
# ── Region Growing ────────────────────────────────────────────────
def _region_growing_demo() -> list[CompositeVideoClip]:
"""Animate region growing BFS from a seed pixel."""
slides = []
grid_size = 10
cell_size = 40
rng = np.random.default_rng(42)
# Create a simple grid: dark region (30-80) and bright region (160-220)
grid = np.zeros((grid_size, grid_size), dtype=np.uint8)
grid[:] = 60 # dark background
grid[2:7, 3:8] = 180 # bright rectangle
# Add some noise
noise = rng.integers(-15, 15, (grid_size, grid_size))
grid = np.clip(grid.astype(int) + noise, 0, 255).astype(np.uint8)
# BFS steps from seed (4, 5)
seed = (4, 5)
threshold_val = 50
visited_order: list[tuple[int, int]] = []
queue = [seed]
visited_set = {seed}
while queue:
r, c = queue.pop(0)
visited_order.append((r, c))
for dr, dc in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
nr, nc = r + dr, c + dc
if (
0 <= nr < grid_size
and 0 <= nc < grid_size
and (nr, nc) not in visited_set
) and abs(int(grid[nr, nc]) - int(grid[seed])) < threshold_val:
visited_set.add((nr, nc))
queue.append((nr, nc))
def make_region_frame(t: float) -> np.ndarray:
frame = np.zeros((H, W, 3), dtype=np.uint8)
frame[:] = BG_COLOR
ox, oy = 100, 180
# How many cells to show as visited
progress = min(t / (STEP_DUR * 0.8), 1.0)
n_visited = int(progress * len(visited_order))
for r in range(grid_size):
for c in range(grid_size):
x = ox + c * cell_size
y = oy + r * cell_size
val = grid[r, c]
color = (val, val, val)
# Highlight visited
if (r, c) in visited_order[:n_visited]:
color = (80, 200, 120) # green for region
elif (r, c) == seed:
color = (255, 200, 50) # yellow seed
frame[y : y + cell_size - 2, x : x + cell_size - 2] = color
# Mark the seed with a bright border
sx = ox + seed[1] * cell_size
sy = ox + seed[0] * cell_size + 80
frame[sy : sy + cell_size, sx : sx + 2] = (255, 200, 50)
frame[sy : sy + cell_size, sx + cell_size - 2 : sx + cell_size] = (255, 200, 50)
frame[sy : sy + 2, sx : sx + cell_size] = (255, 200, 50)
frame[sy + cell_size - 2 : sy + cell_size, sx : sx + cell_size] = (255, 200, 50)
return frame
region_clip = VideoClip(make_region_frame, duration=STEP_DUR).with_fps(FPS)
text_clips: list[VideoClip] = [region_clip]
labels = [
("Region Growing — rozrastanie regionu", 28, "#FFE082", FONT_B, (100, 30)),
("Seed (ziarno) → BFS do podobnych sąsiadów", 20, "#B0BEC5", FONT_R, (100, 80)),
(
"Żółty = seed | Zielony = region | Szary = nieodwiedzone",
16,
"#78909C",
FONT_R,
(100, 120),
),
(
"Sąsiad PODOBNY (|jasność - jasność_regionu| < próg) → dodaj do regionu",
16,
"#A5D6A7",
FONT_R,
(100, 600),
),
(
"Algorytm zatrzymuje się gdy brak podobnych sąsiadów",
16,
"#90CAF9",
FONT_R,
(100, 640),
),
(
"Mnemonik: PLAMA atramentu — rozlewa się na podobne piksele",
18,
"#EF9A9A",
FONT_R,
(100, 670),
),
]
for text, fs, color, font, pos in labels:
tc = (
_tc(text=text, font_size=fs, color=color, font=font)
.with_duration(STEP_DUR)
.with_position(pos)
)
text_clips.append(tc)
slides.append(
CompositeVideoClip(text_clips, size=(W, H)).with_effects(
[FadeIn(0.3), FadeOut(0.3)]
)
)
return slides
# ── Watershed ─────────────────────────────────────────────────────
def _watershed_demo() -> list[CompositeVideoClip]:
"""Animate watershed flooding concept."""
slides = []
def make_watershed_frame(t: float) -> np.ndarray:
frame = np.zeros((H, W, 3), dtype=np.uint8)
frame[:] = BG_COLOR
# Draw terrain profile (1D cross-section)
ox, oy = 100, 450
terrain_w = 900
terrain_points = 100
xs = np.linspace(0, 1, terrain_points)
# Two valleys with a ridge
terrain = (
120 * np.exp(-((xs - 0.25) ** 2) / 0.005)
+ 80 * np.exp(-((xs - 0.75) ** 2) / 0.008)
+ 30
)
terrain = 250 - terrain # invert for visual (valleys at bottom)
# Water level rises over time
water_level = int(160 + 80 * min(t / (STEP_DUR * 0.7), 1.0))
for i in range(terrain_points - 1):
x1 = ox + int(xs[i] * terrain_w)
x2 = ox + int(xs[i + 1] * terrain_w)
y1 = oy - int(terrain[i])
y2 = oy - int(terrain[i + 1])
# Fill terrain
for x in range(x1, min(x2 + 1, W)):
top = min(y1, y2) - 5
frame[top:oy, x : x + 1] = (100, 80, 60)
# Fill water
water_y = oy - water_level
for x in range(x1, min(x2 + 1, W)):
t_y = oy - int(terrain[i])
if water_y < t_y:
# Water fills below terrain surface
fill_top = max(water_y, 0)
fill_bot = min(t_y, oy)
if fill_top < fill_bot:
frame[fill_top:fill_bot, x : x + 1] = (70, 130, 220)
# Dam marker at ridge
ridge_x = ox + int(0.5 * terrain_w)
dam_visible_threshold = 160
if water_level > dam_visible_threshold:
frame[oy - water_level : oy - 140, ridge_x - 2 : ridge_x + 2] = (
255,
80,
80,
)
return frame
ws_clip = VideoClip(make_watershed_frame, duration=STEP_DUR).with_fps(FPS)
text_clips: list[VideoClip] = [ws_clip]
labels = [
("Watershed — metoda zlewiska", 28, "#FFE082", FONT_B, (100, 20)),
(
"Obraz = mapa topograficzna (jasność = wysokość)",
20,
"#B0BEC5",
FONT_R,
(100, 65),
),
(
"Brązowy = teren (ciemne=doliny, jasne=szczyty)",
16,
"#8D6E63",
FONT_R,
(100, 100),
),
("Niebieski = woda zalewająca od minimów", 16, "#64B5F6", FONT_R, (100, 130)),
(
"Czerwony = TAMA (granica segmentu) — gdy woda z 2 dolin się spotka",
16,
"#EF9A9A",
FONT_R,
(100, 160),
),
(
"Problem: over-segmentation "
"(za dużo regionów). "
"Rozwiązanie: marker-controlled.",
16,
"#A5D6A7",
FONT_R,
(100, 560),
),
(
"Mnemonik: ZALEWANIE terenu — granie gór = granice segmentów",
18,
"#FFE082",
FONT_R,
(100, 600),
),
]
for text, fs, color, font, pos in labels:
tc = (
_tc(text=text, font_size=fs, color=color, font=font)
.with_duration(STEP_DUR)
.with_position(pos)
)
text_clips.append(tc)
slides.append(
CompositeVideoClip(text_clips, size=(W, H)).with_effects(
[FadeIn(0.3), FadeOut(0.3)]
)
)
return slides