mirror of
https://github.com/kuhyx/testsAndMisc-archive.git
synced 2026-07-04 13:23:01 +02:00
Split 16+ files. 27 files still need splitting. See session notes.
333 lines
11 KiB
Python
333 lines
11 KiB
Python
"""Classical detection methods: detection concept, HOG+SVM, Viola-Jones."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from _q24_common import (
|
|
BG_COLOR,
|
|
FONT_B,
|
|
FONT_R,
|
|
FPS,
|
|
STEP_DUR,
|
|
H,
|
|
W,
|
|
_tc,
|
|
)
|
|
from moviepy import CompositeVideoClip, VideoClip
|
|
from moviepy.video.fx import FadeIn, FadeOut
|
|
import numpy as np
|
|
|
|
|
|
# ── Detection concept ────────────────────────────────────────────
|
|
def _detection_concept() -> list[CompositeVideoClip]:
|
|
"""Show what detection is: bounding box + class + confidence."""
|
|
slides = []
|
|
|
|
def make_det_frame(_t: float) -> np.ndarray:
|
|
frame = np.zeros((H, W, 3), dtype=np.uint8)
|
|
frame[:] = BG_COLOR
|
|
|
|
# Draw a "scene" with colored rectangles representing objects
|
|
# Sky background area
|
|
frame[140:500, 100:700] = (40, 50, 70)
|
|
|
|
# "Car" object
|
|
frame[350:430, 150:320] = (180, 60, 60)
|
|
# "Person" object
|
|
frame[280:440, 450:520] = (60, 120, 180)
|
|
# "Tree" object
|
|
frame[200:400, 580:650] = (40, 130, 50)
|
|
|
|
# Bounding boxes (with labels drawn as colored borders)
|
|
# Car bbox
|
|
for thickness in range(3):
|
|
t = thickness
|
|
frame[348 - t : 432 + t, 148 - t : 148 - t + 2] = (255, 80, 80)
|
|
frame[348 - t : 432 + t, 322 + t - 2 : 322 + t] = (255, 80, 80)
|
|
frame[348 - t : 348 - t + 2, 148 - t : 322 + t] = (255, 80, 80)
|
|
frame[432 + t - 2 : 432 + t, 148 - t : 322 + t] = (255, 80, 80)
|
|
|
|
# Person bbox
|
|
for thickness in range(3):
|
|
t = thickness
|
|
frame[278 - t : 442 + t, 448 - t : 448 - t + 2] = (80, 180, 255)
|
|
frame[278 - t : 442 + t, 522 + t - 2 : 522 + t] = (80, 180, 255)
|
|
frame[278 - t : 278 - t + 2, 448 - t : 522 + t] = (80, 180, 255)
|
|
frame[442 + t - 2 : 442 + t, 448 - t : 522 + t] = (80, 180, 255)
|
|
|
|
# Tree bbox
|
|
for thickness in range(3):
|
|
t = thickness
|
|
frame[198 - t : 402 + t, 578 - t : 578 - t + 2] = (80, 220, 100)
|
|
frame[198 - t : 402 + t, 652 + t - 2 : 652 + t] = (80, 220, 100)
|
|
frame[198 - t : 198 - t + 2, 578 - t : 652 + t] = (80, 220, 100)
|
|
frame[402 + t - 2 : 402 + t, 578 - t : 652 + t] = (80, 220, 100)
|
|
|
|
# Comparison boxes on right side
|
|
# Classification
|
|
frame[180:260, 800:1150] = (35, 45, 65)
|
|
# Detection
|
|
frame[290:370, 800:1150] = (35, 45, 65)
|
|
# Segmentation
|
|
frame[400:480, 800:1150] = (35, 45, 65)
|
|
|
|
return frame
|
|
|
|
det_clip = VideoClip(make_det_frame, duration=STEP_DUR).with_fps(FPS)
|
|
text_clips: list[VideoClip] = [det_clip]
|
|
labels = [
|
|
("Detekcja obiektów — co to jest?", 28, "#FFE082", FONT_B, (100, 20)),
|
|
("Wynik: (klasa, bounding box, pewność)", 20, "#B0BEC5", FONT_R, (100, 65)),
|
|
("samochód 95%", 14, "#EF9A9A", FONT_B, (150, 340)),
|
|
("osoba 88%", 14, "#64B5F6", FONT_B, (450, 268)),
|
|
("drzewo 72%", 14, "#A5D6A7", FONT_B, (580, 188)),
|
|
("Klasyfikacja: cały obraz → 1 etykieta", 15, "#78909C", FONT_R, (810, 210)),
|
|
("Detekcja: bbox + klasa + pewność", 15, "#FFE082", FONT_R, (810, 320)),
|
|
("Segmentacja: maska per piksel", 15, "#78909C", FONT_R, (810, 430)),
|
|
("← granulacja rośnie →", 14, "#90CAF9", FONT_R, (810, 520)),
|
|
]
|
|
for text, fs, color, font, pos in labels:
|
|
tc = (
|
|
_tc(text=text, font_size=fs, color=color, font=font)
|
|
.with_duration(STEP_DUR)
|
|
.with_position(pos)
|
|
)
|
|
text_clips.append(tc)
|
|
|
|
slides.append(
|
|
CompositeVideoClip(text_clips, size=(W, H)).with_effects(
|
|
[FadeIn(0.3), FadeOut(0.3)]
|
|
)
|
|
)
|
|
return slides
|
|
|
|
|
|
# ── HOG + SVM pipeline ───────────────────────────────────────────
|
|
def _hog_svm_demo() -> list[CompositeVideoClip]:
|
|
"""Animate HOG feature computation and SVM classification."""
|
|
slides = []
|
|
|
|
def make_hog_frame(t: float) -> np.ndarray:
|
|
frame = np.zeros((H, W, 3), dtype=np.uint8)
|
|
frame[:] = BG_COLOR
|
|
|
|
progress = min(t / (STEP_DUR * 0.8), 1.0)
|
|
|
|
# Pipeline stages as boxes with arrows
|
|
stages = [
|
|
("Gradient", (80, 250), (130, 80), (100, 160, 220)),
|
|
("Orientacja", (260, 250), (130, 80), (80, 180, 140)),
|
|
("Komórki 8x8", (440, 250), (130, 80), (200, 160, 80)),
|
|
("Bloki 2x2", (620, 250), (130, 80), (200, 120, 60)),
|
|
("Normalizacja", (800, 250), (130, 80), (180, 100, 80)),
|
|
("SVM", (980, 250), (130, 80), (220, 80, 80)),
|
|
]
|
|
|
|
n_active = int(progress * len(stages)) + 1
|
|
|
|
for i, (_label, (sx, sy), (sw, sh), color) in enumerate(stages):
|
|
if i < n_active:
|
|
frame[sy : sy + sh, sx : sx + sw] = color
|
|
# Border
|
|
frame[sy : sy + 2, sx : sx + sw] = tuple(
|
|
min(c + 60, 255) for c in color
|
|
)
|
|
frame[sy + sh - 2 : sy + sh, sx : sx + sw] = tuple(
|
|
min(c + 60, 255) for c in color
|
|
)
|
|
|
|
# Arrow to next
|
|
if i < len(stages) - 1:
|
|
ax = sx + sw + 5
|
|
ay = sy + sh // 2
|
|
frame[ay - 1 : ay + 2, ax : ax + 20] = (150, 150, 170)
|
|
|
|
# Show gradient computation example at bottom
|
|
gradient_phase = 0.2
|
|
if progress > gradient_phase:
|
|
# Mini pixel grid showing gradient computation
|
|
gx, gy = 100, 430
|
|
pixels = [50, 50, 200]
|
|
for idx, val in enumerate(pixels):
|
|
x = gx + idx * 50
|
|
frame[gy : gy + 40, x : x + 40] = (val, val, val)
|
|
|
|
return frame
|
|
|
|
hog_clip = VideoClip(make_hog_frame, duration=STEP_DUR).with_fps(FPS)
|
|
text_clips: list[VideoClip] = [hog_clip]
|
|
labels = [
|
|
("HOG + SVM — pipeline detekcji pieszych", 28, "#FFE082", FONT_B, (80, 20)),
|
|
(
|
|
"Mnemonik: GOKBN = Gradienty→Orientacja→Komórki→Bloki→Normalizacja",
|
|
16,
|
|
"#A5D6A7",
|
|
FONT_R,
|
|
(80, 65),
|
|
),
|
|
("Gradient: siła i kierunek zmiany jasności", 14, "#64B5F6", FONT_R, (80, 95)),
|
|
(
|
|
"Histogram: 9 binów (0°-180°, co 20°) per komórka 8x8",
|
|
14,
|
|
"#78909C",
|
|
FONT_R,
|
|
(80, 120),
|
|
),
|
|
(
|
|
"[50][50][200] → Gx = 200-50 = 150 = silna krawędź!",
|
|
16,
|
|
"#EF9A9A",
|
|
FONT_R,
|
|
(80, 490),
|
|
),
|
|
(
|
|
"Wektor HOG (3780 cech) → SVM: pieszy (+1) / tło (-1)",
|
|
16,
|
|
"white",
|
|
FONT_R,
|
|
(80, 540),
|
|
),
|
|
(
|
|
"Sliding window 64x128 przesuwa się po obrazie → NMS → wynik",
|
|
16,
|
|
"#90CAF9",
|
|
FONT_R,
|
|
(80, 580),
|
|
),
|
|
(
|
|
"SVM = LINIA MAKSYMALNEGO ODDECHU (max margines, support vectors)",
|
|
16,
|
|
"#FFE082",
|
|
FONT_R,
|
|
(80, 620),
|
|
),
|
|
]
|
|
for text, fs, color, font, pos in labels:
|
|
tc = (
|
|
_tc(text=text, font_size=fs, color=color, font=font)
|
|
.with_duration(STEP_DUR)
|
|
.with_position(pos)
|
|
)
|
|
text_clips.append(tc)
|
|
|
|
slides.append(
|
|
CompositeVideoClip(text_clips, size=(W, H)).with_effects(
|
|
[FadeIn(0.3), FadeOut(0.3)]
|
|
)
|
|
)
|
|
return slides
|
|
|
|
|
|
# ── Viola-Jones ───────────────────────────────────────────────────
|
|
def _viola_jones_demo() -> list[CompositeVideoClip]:
|
|
"""Animate Viola-Jones cascade concept."""
|
|
slides = []
|
|
|
|
def make_cascade_frame(t: float) -> np.ndarray:
|
|
frame = np.zeros((H, W, 3), dtype=np.uint8)
|
|
frame[:] = BG_COLOR
|
|
|
|
progress = min(t / (STEP_DUR * 0.8), 1.0)
|
|
|
|
# Draw cascade "funnel" — stages filtering out non-faces
|
|
stages = 5
|
|
start_width = 1000
|
|
start_count = 10000
|
|
x_center = W // 2
|
|
|
|
for i in range(stages):
|
|
stage_progress = min(progress * stages - i, 1.0)
|
|
if stage_progress <= 0:
|
|
break
|
|
|
|
width = int(start_width * (1 - i * 0.18))
|
|
int(start_count * (0.3**i))
|
|
y = 150 + i * 100
|
|
h_box = 60
|
|
|
|
# Stage box
|
|
x1 = x_center - width // 2
|
|
frame[y : y + h_box, x1 : x1 + width] = (
|
|
50 + i * 10,
|
|
60 + i * 10,
|
|
80 + i * 10,
|
|
)
|
|
# Border
|
|
frame[y : y + 2, x1 : x1 + width] = (100 + i * 20, 130 + i * 15, 200)
|
|
frame[y + h_box - 2 : y + h_box, x1 : x1 + width] = (
|
|
100 + i * 20,
|
|
130 + i * 15,
|
|
200,
|
|
)
|
|
|
|
# Arrow down to next
|
|
if i < stages - 1:
|
|
frame[y + h_box + 5 : y + h_box + 25, x_center - 1 : x_center + 2] = (
|
|
150,
|
|
150,
|
|
170,
|
|
)
|
|
|
|
# Red "rejected" arrows on sides
|
|
if i > 0:
|
|
# Left reject arrow
|
|
rx = x1 - 30
|
|
ry = y + h_box // 2
|
|
frame[ry - 1 : ry + 2, rx : rx + 25] = (200, 80, 80)
|
|
|
|
return frame
|
|
|
|
cascade_clip = VideoClip(make_cascade_frame, duration=STEP_DUR).with_fps(FPS)
|
|
text_clips: list[VideoClip] = [cascade_clip]
|
|
labels = [
|
|
(
|
|
"Viola-Jones — kaskada klasyfikatorów (2001)",
|
|
28,
|
|
"#FFE082",
|
|
FONT_B,
|
|
(80, 20),
|
|
),
|
|
(
|
|
"3 innowacje: HIC = Haar + Integral Image + Cascade",
|
|
20,
|
|
"#B0BEC5",
|
|
FONT_R,
|
|
(80, 65),
|
|
),
|
|
("Etap 1: 2 cechy Haar", 14, "#64B5F6", FONT_R, (170, 170)),
|
|
("Etap 2: 10 cech", 14, "#64B5F6", FONT_R, (210, 270)),
|
|
("Etap 3: 25 cech", 14, "#64B5F6", FONT_R, (240, 370)),
|
|
("Etap 4: 50 cech", 14, "#64B5F6", FONT_R, (260, 470)),
|
|
("→ TWARZ!", 16, "#A5D6A7", FONT_B, (590, 560)),
|
|
(
|
|
"SITO: 99% okien odpada w pierwszych 3 etapach → REAL-TIME!",
|
|
16,
|
|
"#EF9A9A",
|
|
FONT_R,
|
|
(80, 620),
|
|
),
|
|
(
|
|
"Haar: kontrast jasna/ciemna | Integral Image: "
|
|
"suma prostokąta O(1) = 4 odczyty",
|
|
14,
|
|
"#78909C",
|
|
FONT_R,
|
|
(80, 655),
|
|
),
|
|
("odrzucone →", 12, "#EF9A9A", FONT_R, (60, 275)),
|
|
("odrzucone →", 12, "#EF9A9A", FONT_R, (60, 375)),
|
|
]
|
|
for text, fs, color, font, pos in labels:
|
|
tc = (
|
|
_tc(text=text, font_size=fs, color=color, font=font)
|
|
.with_duration(STEP_DUR)
|
|
.with_position(pos)
|
|
)
|
|
text_clips.append(tc)
|
|
|
|
slides.append(
|
|
CompositeVideoClip(text_clips, size=(W, H)).with_effects(
|
|
[FadeIn(0.3), FadeOut(0.3)]
|
|
)
|
|
)
|
|
return slides
|