mirror of
https://github.com/kuhyx/testsAndMisc-archive.git
synced 2026-07-04 15:23:06 +02:00
446 lines
15 KiB
Python
446 lines
15 KiB
Python
|
|
"""Classical segmentation methods: concept, thresholding, region growing, watershed."""
|
||
|
|
|
||
|
|
from __future__ import annotations
|
||
|
|
|
||
|
|
from moviepy import (
|
||
|
|
CompositeVideoClip,
|
||
|
|
VideoClip,
|
||
|
|
)
|
||
|
|
from moviepy.video.fx import FadeIn, FadeOut
|
||
|
|
import numpy as np
|
||
|
|
|
||
|
|
from python_pkg.praca_magisterska_video._q23_helpers import (
|
||
|
|
BG_COLOR,
|
||
|
|
FONT_B,
|
||
|
|
FONT_R,
|
||
|
|
FPS,
|
||
|
|
STEP_DUR,
|
||
|
|
H,
|
||
|
|
W,
|
||
|
|
_tc,
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
# ── Segmentation concept ─────────────────────────────────────────
|
||
|
|
def _segmentation_concept() -> list[CompositeVideoClip]:
|
||
|
|
"""Show what segmentation is: pixel-level labeling."""
|
||
|
|
slides = []
|
||
|
|
|
||
|
|
# Synthetic image: grid of colored pixels
|
||
|
|
def make_image_frame(_t: float) -> np.ndarray:
|
||
|
|
frame = np.zeros((H, W, 3), dtype=np.uint8)
|
||
|
|
frame[:] = BG_COLOR
|
||
|
|
|
||
|
|
# Draw a small "image" grid
|
||
|
|
grid_x, grid_y = 100, 150
|
||
|
|
cell = 40
|
||
|
|
# Sky (top rows)
|
||
|
|
colors_map = [
|
||
|
|
[(135, 206, 235)] * 8, # sky
|
||
|
|
[(135, 206, 235)] * 5 + [(34, 139, 34)] * 3, # sky + tree
|
||
|
|
[(34, 139, 34)] * 3
|
||
|
|
+ [(128, 128, 128)] * 3
|
||
|
|
+ [(34, 139, 34)] * 2, # tree+road+tree
|
||
|
|
[(128, 128, 128)] * 3
|
||
|
|
+ [(200, 50, 50)] * 2
|
||
|
|
+ [(128, 128, 128)] * 3, # road+car+road
|
||
|
|
]
|
||
|
|
labels_map = [
|
||
|
|
["niebo"] * 8,
|
||
|
|
["niebo"] * 5 + ["drzewo"] * 3,
|
||
|
|
["drzewo"] * 3 + ["droga"] * 3 + ["drzewo"] * 2,
|
||
|
|
["droga"] * 3 + ["samochód"] * 2 + ["droga"] * 3,
|
||
|
|
]
|
||
|
|
label_colors = {
|
||
|
|
"niebo": (100, 180, 255),
|
||
|
|
"drzewo": (50, 200, 50),
|
||
|
|
"droga": (180, 180, 180),
|
||
|
|
"samochód": (255, 80, 80),
|
||
|
|
}
|
||
|
|
|
||
|
|
for r, row in enumerate(colors_map):
|
||
|
|
for c, col in enumerate(row):
|
||
|
|
y = grid_y + r * cell
|
||
|
|
x = grid_x + c * cell
|
||
|
|
frame[y : y + cell - 2, x : x + cell - 2] = col
|
||
|
|
|
||
|
|
# Draw segmentation map on the right
|
||
|
|
seg_x = 600
|
||
|
|
for r, row in enumerate(labels_map):
|
||
|
|
for c, lab in enumerate(row):
|
||
|
|
y = grid_y + r * cell
|
||
|
|
x = seg_x + c * cell
|
||
|
|
frame[y : y + cell - 2, x : x + cell - 2] = label_colors[lab]
|
||
|
|
|
||
|
|
return frame
|
||
|
|
|
||
|
|
image_clip = VideoClip(make_image_frame, duration=STEP_DUR).with_fps(FPS)
|
||
|
|
labels_text = [
|
||
|
|
("Obraz wejściowy", 22, "white", FONT_B, (170, 100)),
|
||
|
|
("Mapa segmentacji", 22, "white", FONT_B, (660, 100)),
|
||
|
|
("→", 50, "#FFE082", FONT_B, (450, 250)),
|
||
|
|
("Każdy piksel → etykieta klasy", 20, "#B0BEC5", FONT_R, (100, 420)),
|
||
|
|
("niebo | drzewo | droga | samochód", 18, "#90CAF9", FONT_R, (600, 420)),
|
||
|
|
("Segmentacja = klasyfikacja per-piksel", 24, "#FFE082", FONT_B, (100, 500)),
|
||
|
|
(
|
||
|
|
"Semantic: klasy bez instancji | Instance: "
|
||
|
|
"rozróżnia obiekty | Panoptic: oba",
|
||
|
|
16,
|
||
|
|
"#78909C",
|
||
|
|
FONT_R,
|
||
|
|
(100, 560),
|
||
|
|
),
|
||
|
|
]
|
||
|
|
clips: list[VideoClip] = [image_clip]
|
||
|
|
for text, fs, color, font, pos in labels_text:
|
||
|
|
tc = (
|
||
|
|
_tc(text=text, font_size=fs, color=color, font=font)
|
||
|
|
.with_duration(STEP_DUR)
|
||
|
|
.with_position(pos)
|
||
|
|
)
|
||
|
|
clips.append(tc)
|
||
|
|
|
||
|
|
slides.append(
|
||
|
|
CompositeVideoClip(clips, size=(W, H)).with_effects([FadeIn(0.3), FadeOut(0.3)])
|
||
|
|
)
|
||
|
|
return slides
|
||
|
|
|
||
|
|
|
||
|
|
# ── Thresholding / Otsu ───────────────────────────────────────────
|
||
|
|
def _thresholding_demo() -> list[CompositeVideoClip]:
|
||
|
|
"""Animate thresholding and Otsu concept."""
|
||
|
|
slides = []
|
||
|
|
|
||
|
|
# Show histogram & threshold
|
||
|
|
def make_threshold_frame(t: float) -> np.ndarray:
|
||
|
|
frame = np.zeros((H, W, 3), dtype=np.uint8)
|
||
|
|
frame[:] = BG_COLOR
|
||
|
|
|
||
|
|
# Draw bimodal histogram bars
|
||
|
|
bar_start_x = 80
|
||
|
|
bar_y = 500
|
||
|
|
bar_w = 4
|
||
|
|
|
||
|
|
for i in range(256):
|
||
|
|
# Bimodal: peaks at 60 and 190
|
||
|
|
h1 = 200 * np.exp(-((i - 60) ** 2) / (2 * 20**2))
|
||
|
|
h2 = 150 * np.exp(-((i - 190) ** 2) / (2 * 25**2))
|
||
|
|
bar_h = int(h1 + h2)
|
||
|
|
x = bar_start_x + i * bar_w
|
||
|
|
if x + bar_w < W:
|
||
|
|
frame[bar_y - bar_h : bar_y, x : x + bar_w - 1] = (150, 150, 170)
|
||
|
|
|
||
|
|
# Animated threshold line
|
||
|
|
threshold = int(60 + (190 - 60) * min(t / (STEP_DUR * 0.7), 1.0))
|
||
|
|
tx = bar_start_x + threshold * bar_w
|
||
|
|
if tx < W:
|
||
|
|
frame[bar_y - 250 : bar_y + 10, tx : tx + 3] = (255, 80, 80)
|
||
|
|
|
||
|
|
# Color the two sides
|
||
|
|
for i in range(threshold):
|
||
|
|
x = bar_start_x + i * bar_w
|
||
|
|
h1 = 200 * np.exp(-((i - 60) ** 2) / (2 * 20**2))
|
||
|
|
h2 = 150 * np.exp(-((i - 190) ** 2) / (2 * 25**2))
|
||
|
|
bar_h = int(h1 + h2)
|
||
|
|
if x + bar_w < W and bar_h > 0:
|
||
|
|
frame[bar_y - bar_h : bar_y, x : x + bar_w - 1] = (70, 130, 200)
|
||
|
|
|
||
|
|
for i in range(threshold, 256):
|
||
|
|
x = bar_start_x + i * bar_w
|
||
|
|
h1 = 200 * np.exp(-((i - 60) ** 2) / (2 * 20**2))
|
||
|
|
h2 = 150 * np.exp(-((i - 190) ** 2) / (2 * 25**2))
|
||
|
|
bar_h = int(h1 + h2)
|
||
|
|
if x + bar_w < W and bar_h > 0:
|
||
|
|
frame[bar_y - bar_h : bar_y, x : x + bar_w - 1] = (200, 100, 80)
|
||
|
|
|
||
|
|
return frame
|
||
|
|
|
||
|
|
hist_clip = VideoClip(make_threshold_frame, duration=STEP_DUR).with_fps(FPS)
|
||
|
|
text_clips: list[VideoClip] = [hist_clip]
|
||
|
|
labels = [
|
||
|
|
("Progowanie (Thresholding) z metodą Otsu", 28, "#FFE082", FONT_B, (80, 30)),
|
||
|
|
(
|
||
|
|
"Histogram jasności pikseli — dwumodalny (bimodal)",
|
||
|
|
20,
|
||
|
|
"#B0BEC5",
|
||
|
|
FONT_R,
|
||
|
|
(80, 80),
|
||
|
|
),
|
||
|
|
("Garb 1: piksele obiektu (ciemne ~60)", 16, "#64B5F6", FONT_R, (80, 120)),
|
||
|
|
("Garb 2: piksele tła (jasne ~190)", 16, "#EF9A9A", FONT_R, (80, 150)),
|
||
|
|
(
|
||
|
|
"Próg T (czerwona linia) dzieli piksele na 2 klasy",
|
||
|
|
18,
|
||
|
|
"white",
|
||
|
|
FONT_R,
|
||
|
|
(80, 540),
|
||
|
|
),
|
||
|
|
(
|
||
|
|
"Otsu: automatycznie testuje T=0..255, minimalizuje σ² wewnątrzklasową",
|
||
|
|
16,
|
||
|
|
"#A5D6A7",
|
||
|
|
FONT_R,
|
||
|
|
(80, 580),
|
||
|
|
),
|
||
|
|
(
|
||
|
|
"Piksel ≤ T → klasa 0 (tło) | Piksel > T → klasa 1 (obiekt)",
|
||
|
|
16,
|
||
|
|
"#78909C",
|
||
|
|
FONT_R,
|
||
|
|
(80, 620),
|
||
|
|
),
|
||
|
|
]
|
||
|
|
for text, fs, color, font, pos in labels:
|
||
|
|
tc = (
|
||
|
|
_tc(text=text, font_size=fs, color=color, font=font)
|
||
|
|
.with_duration(STEP_DUR)
|
||
|
|
.with_position(pos)
|
||
|
|
)
|
||
|
|
text_clips.append(tc)
|
||
|
|
|
||
|
|
slides.append(
|
||
|
|
CompositeVideoClip(text_clips, size=(W, H)).with_effects(
|
||
|
|
[FadeIn(0.3), FadeOut(0.3)]
|
||
|
|
)
|
||
|
|
)
|
||
|
|
return slides
|
||
|
|
|
||
|
|
|
||
|
|
# ── Region Growing ────────────────────────────────────────────────
|
||
|
|
def _region_growing_demo() -> list[CompositeVideoClip]:
|
||
|
|
"""Animate region growing BFS from a seed pixel."""
|
||
|
|
slides = []
|
||
|
|
|
||
|
|
grid_size = 10
|
||
|
|
cell_size = 40
|
||
|
|
rng = np.random.default_rng(42)
|
||
|
|
# Create a simple grid: dark region (30-80) and bright region (160-220)
|
||
|
|
grid = np.zeros((grid_size, grid_size), dtype=np.uint8)
|
||
|
|
grid[:] = 60 # dark background
|
||
|
|
grid[2:7, 3:8] = 180 # bright rectangle
|
||
|
|
|
||
|
|
# Add some noise
|
||
|
|
noise = rng.integers(-15, 15, (grid_size, grid_size))
|
||
|
|
grid = np.clip(grid.astype(int) + noise, 0, 255).astype(np.uint8)
|
||
|
|
|
||
|
|
# BFS steps from seed (4, 5)
|
||
|
|
seed = (4, 5)
|
||
|
|
threshold_val = 50
|
||
|
|
visited_order: list[tuple[int, int]] = []
|
||
|
|
queue = [seed]
|
||
|
|
visited_set = {seed}
|
||
|
|
while queue:
|
||
|
|
r, c = queue.pop(0)
|
||
|
|
visited_order.append((r, c))
|
||
|
|
for dr, dc in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
|
||
|
|
nr, nc = r + dr, c + dc
|
||
|
|
if (
|
||
|
|
0 <= nr < grid_size
|
||
|
|
and 0 <= nc < grid_size
|
||
|
|
and (nr, nc) not in visited_set
|
||
|
|
) and abs(int(grid[nr, nc]) - int(grid[seed])) < threshold_val:
|
||
|
|
visited_set.add((nr, nc))
|
||
|
|
queue.append((nr, nc))
|
||
|
|
|
||
|
|
def make_region_frame(t: float) -> np.ndarray:
|
||
|
|
frame = np.zeros((H, W, 3), dtype=np.uint8)
|
||
|
|
frame[:] = BG_COLOR
|
||
|
|
ox, oy = 100, 180
|
||
|
|
|
||
|
|
# How many cells to show as visited
|
||
|
|
progress = min(t / (STEP_DUR * 0.8), 1.0)
|
||
|
|
n_visited = int(progress * len(visited_order))
|
||
|
|
|
||
|
|
for r in range(grid_size):
|
||
|
|
for c in range(grid_size):
|
||
|
|
x = ox + c * cell_size
|
||
|
|
y = oy + r * cell_size
|
||
|
|
val = grid[r, c]
|
||
|
|
color = (val, val, val)
|
||
|
|
|
||
|
|
# Highlight visited
|
||
|
|
if (r, c) in visited_order[:n_visited]:
|
||
|
|
color = (80, 200, 120) # green for region
|
||
|
|
elif (r, c) == seed:
|
||
|
|
color = (255, 200, 50) # yellow seed
|
||
|
|
|
||
|
|
frame[y : y + cell_size - 2, x : x + cell_size - 2] = color
|
||
|
|
|
||
|
|
# Mark the seed with a bright border
|
||
|
|
sx = ox + seed[1] * cell_size
|
||
|
|
sy = ox + seed[0] * cell_size + 80
|
||
|
|
frame[sy : sy + cell_size, sx : sx + 2] = (255, 200, 50)
|
||
|
|
frame[sy : sy + cell_size, sx + cell_size - 2 : sx + cell_size] = (255, 200, 50)
|
||
|
|
frame[sy : sy + 2, sx : sx + cell_size] = (255, 200, 50)
|
||
|
|
frame[sy + cell_size - 2 : sy + cell_size, sx : sx + cell_size] = (255, 200, 50)
|
||
|
|
|
||
|
|
return frame
|
||
|
|
|
||
|
|
region_clip = VideoClip(make_region_frame, duration=STEP_DUR).with_fps(FPS)
|
||
|
|
text_clips: list[VideoClip] = [region_clip]
|
||
|
|
labels = [
|
||
|
|
("Region Growing — rozrastanie regionu", 28, "#FFE082", FONT_B, (100, 30)),
|
||
|
|
("Seed (ziarno) → BFS do podobnych sąsiadów", 20, "#B0BEC5", FONT_R, (100, 80)),
|
||
|
|
(
|
||
|
|
"Żółty = seed | Zielony = region | Szary = nieodwiedzone",
|
||
|
|
16,
|
||
|
|
"#78909C",
|
||
|
|
FONT_R,
|
||
|
|
(100, 120),
|
||
|
|
),
|
||
|
|
(
|
||
|
|
"Sąsiad PODOBNY (|jasność - jasność_regionu| < próg) → dodaj do regionu",
|
||
|
|
16,
|
||
|
|
"#A5D6A7",
|
||
|
|
FONT_R,
|
||
|
|
(100, 600),
|
||
|
|
),
|
||
|
|
(
|
||
|
|
"Algorytm zatrzymuje się gdy brak podobnych sąsiadów",
|
||
|
|
16,
|
||
|
|
"#90CAF9",
|
||
|
|
FONT_R,
|
||
|
|
(100, 640),
|
||
|
|
),
|
||
|
|
(
|
||
|
|
"Mnemonik: PLAMA atramentu — rozlewa się na podobne piksele",
|
||
|
|
18,
|
||
|
|
"#EF9A9A",
|
||
|
|
FONT_R,
|
||
|
|
(100, 670),
|
||
|
|
),
|
||
|
|
]
|
||
|
|
for text, fs, color, font, pos in labels:
|
||
|
|
tc = (
|
||
|
|
_tc(text=text, font_size=fs, color=color, font=font)
|
||
|
|
.with_duration(STEP_DUR)
|
||
|
|
.with_position(pos)
|
||
|
|
)
|
||
|
|
text_clips.append(tc)
|
||
|
|
|
||
|
|
slides.append(
|
||
|
|
CompositeVideoClip(text_clips, size=(W, H)).with_effects(
|
||
|
|
[FadeIn(0.3), FadeOut(0.3)]
|
||
|
|
)
|
||
|
|
)
|
||
|
|
return slides
|
||
|
|
|
||
|
|
|
||
|
|
# ── Watershed ─────────────────────────────────────────────────────
|
||
|
|
def _watershed_demo() -> list[CompositeVideoClip]:
|
||
|
|
"""Animate watershed flooding concept."""
|
||
|
|
slides = []
|
||
|
|
|
||
|
|
def make_watershed_frame(t: float) -> np.ndarray:
|
||
|
|
frame = np.zeros((H, W, 3), dtype=np.uint8)
|
||
|
|
frame[:] = BG_COLOR
|
||
|
|
|
||
|
|
# Draw terrain profile (1D cross-section)
|
||
|
|
ox, oy = 100, 450
|
||
|
|
terrain_w = 900
|
||
|
|
terrain_points = 100
|
||
|
|
|
||
|
|
xs = np.linspace(0, 1, terrain_points)
|
||
|
|
# Two valleys with a ridge
|
||
|
|
terrain = (
|
||
|
|
120 * np.exp(-((xs - 0.25) ** 2) / 0.005)
|
||
|
|
+ 80 * np.exp(-((xs - 0.75) ** 2) / 0.008)
|
||
|
|
+ 30
|
||
|
|
)
|
||
|
|
terrain = 250 - terrain # invert for visual (valleys at bottom)
|
||
|
|
|
||
|
|
# Water level rises over time
|
||
|
|
water_level = int(160 + 80 * min(t / (STEP_DUR * 0.7), 1.0))
|
||
|
|
|
||
|
|
for i in range(terrain_points - 1):
|
||
|
|
x1 = ox + int(xs[i] * terrain_w)
|
||
|
|
x2 = ox + int(xs[i + 1] * terrain_w)
|
||
|
|
y1 = oy - int(terrain[i])
|
||
|
|
y2 = oy - int(terrain[i + 1])
|
||
|
|
|
||
|
|
# Fill terrain
|
||
|
|
for x in range(x1, min(x2 + 1, W)):
|
||
|
|
top = min(y1, y2) - 5
|
||
|
|
frame[top:oy, x : x + 1] = (100, 80, 60)
|
||
|
|
|
||
|
|
# Fill water
|
||
|
|
water_y = oy - water_level
|
||
|
|
for x in range(x1, min(x2 + 1, W)):
|
||
|
|
t_y = oy - int(terrain[i])
|
||
|
|
if water_y < t_y:
|
||
|
|
# Water fills below terrain surface
|
||
|
|
fill_top = max(water_y, 0)
|
||
|
|
fill_bot = min(t_y, oy)
|
||
|
|
if fill_top < fill_bot:
|
||
|
|
frame[fill_top:fill_bot, x : x + 1] = (70, 130, 220)
|
||
|
|
|
||
|
|
# Dam marker at ridge
|
||
|
|
ridge_x = ox + int(0.5 * terrain_w)
|
||
|
|
dam_visible_threshold = 160
|
||
|
|
if water_level > dam_visible_threshold:
|
||
|
|
frame[oy - water_level : oy - 140, ridge_x - 2 : ridge_x + 2] = (
|
||
|
|
255,
|
||
|
|
80,
|
||
|
|
80,
|
||
|
|
)
|
||
|
|
|
||
|
|
return frame
|
||
|
|
|
||
|
|
ws_clip = VideoClip(make_watershed_frame, duration=STEP_DUR).with_fps(FPS)
|
||
|
|
text_clips: list[VideoClip] = [ws_clip]
|
||
|
|
labels = [
|
||
|
|
("Watershed — metoda zlewiska", 28, "#FFE082", FONT_B, (100, 20)),
|
||
|
|
(
|
||
|
|
"Obraz = mapa topograficzna (jasność = wysokość)",
|
||
|
|
20,
|
||
|
|
"#B0BEC5",
|
||
|
|
FONT_R,
|
||
|
|
(100, 65),
|
||
|
|
),
|
||
|
|
(
|
||
|
|
"Brązowy = teren (ciemne=doliny, jasne=szczyty)",
|
||
|
|
16,
|
||
|
|
"#8D6E63",
|
||
|
|
FONT_R,
|
||
|
|
(100, 100),
|
||
|
|
),
|
||
|
|
("Niebieski = woda zalewająca od minimów", 16, "#64B5F6", FONT_R, (100, 130)),
|
||
|
|
(
|
||
|
|
"Czerwony = TAMA (granica segmentu) — gdy woda z 2 dolin się spotka",
|
||
|
|
16,
|
||
|
|
"#EF9A9A",
|
||
|
|
FONT_R,
|
||
|
|
(100, 160),
|
||
|
|
),
|
||
|
|
(
|
||
|
|
"Problem: over-segmentation "
|
||
|
|
"(za dużo regionów). "
|
||
|
|
"Rozwiązanie: marker-controlled.",
|
||
|
|
16,
|
||
|
|
"#A5D6A7",
|
||
|
|
FONT_R,
|
||
|
|
(100, 560),
|
||
|
|
),
|
||
|
|
(
|
||
|
|
"Mnemonik: ZALEWANIE terenu — granie gór = granice segmentów",
|
||
|
|
18,
|
||
|
|
"#FFE082",
|
||
|
|
FONT_R,
|
||
|
|
(100, 600),
|
||
|
|
),
|
||
|
|
]
|
||
|
|
for text, fs, color, font, pos in labels:
|
||
|
|
tc = (
|
||
|
|
_tc(text=text, font_size=fs, color=color, font=font)
|
||
|
|
.with_duration(STEP_DUR)
|
||
|
|
.with_position(pos)
|
||
|
|
)
|
||
|
|
text_clips.append(tc)
|
||
|
|
|
||
|
|
slides.append(
|
||
|
|
CompositeVideoClip(text_clips, size=(W, H)).with_effects(
|
||
|
|
[FadeIn(0.3), FadeOut(0.3)]
|
||
|
|
)
|
||
|
|
)
|
||
|
|
return slides
|