testsAndMisc-archive/python_pkg/praca_magisterska_video/_q23_deeplab.py

"""DeepLab architecture animations for Q23 segmentation video."""

from __future__ import annotations

from moviepy import (
    CompositeVideoClip,
    VideoClip,
)
import numpy as np

from python_pkg.praca_magisterska_video._q23_helpers import (
    BG_COLOR,
    FONT_B,
    FONT_R,
    FPS,
    STEP_DUR,
    H,
    W,
    _compose_slide,
)


# ── DeepLab Architecture ─────────────────────────────────────────
def _make_dilated_frame(t: float) -> np.ndarray:
    """Render a dilated convolution comparison frame."""
    frame = np.zeros((H, W, 3), dtype=np.uint8)
    frame[:] = BG_COLOR
    progress = min(t / (STEP_DUR * 0.7), 1.0)

    cell = 36
    grids = [
        (
            "rate=1",
            60,
            [
                (0, 0),
                (0, 1),
                (0, 2),
                (1, 0),
                (1, 1),
                (1, 2),
                (2, 0),
                (2, 1),
                (2, 2),
            ],
        ),
        (
            "rate=2",
            420,
            [
                (0, 0),
                (0, 2),
                (0, 4),
                (2, 0),
                (2, 2),
                (2, 4),
                (4, 0),
                (4, 2),
                (4, 4),
            ],
        ),
        (
            "rate=3",
            820,
            [
                (0, 0),
                (0, 3),
                (0, 6),
                (3, 0),
                (3, 3),
                (3, 6),
                (6, 0),
                (6, 3),
                (6, 6),
            ],
        ),
    ]

    for gi, (_label, gx, positions) in enumerate(grids):
        if progress < gi * 0.3:
            break
        gy = 180
        grid_size = 7
        for r in range(grid_size):
            for c in range(grid_size):
                x = gx + c * cell
                y = gy + r * cell
                frame[y : y + cell - 2, x : x + cell - 2] = (35, 40, 55)
        for r, c in positions:
            x = gx + c * cell
            y = gy + r * cell
            frame[y : y + cell - 2, x : x + cell - 2] = (70, 130, 200)
            frame[y : y + 2, x : x + cell - 2] = (120, 180, 255)
            frame[y + cell - 4 : y + cell - 2, x : x + cell - 2] = (120, 180, 255)

    return frame


def _make_aspp_frame(t: float) -> np.ndarray:
    """Render a single ASPP module animation frame."""
    frame = np.zeros((H, W, 3), dtype=np.uint8)
    frame[:] = BG_COLOR
    progress = min(t / (STEP_DUR * 0.7), 1.0)

    frame[250:330, 50:130] = (70, 130, 200)
    frame[250:252, 50:130] = (120, 180, 255)
    frame[328:330, 50:130] = (120, 180, 255)

    branches = [
        ("1x1 conv", 250, (200, 170), (100, 40), (80, 200, 120)),
        ("rate=6", 310, (200, 250), (100, 40), (200, 160, 80)),
        ("rate=12", 370, (200, 330), (100, 40), (200, 120, 60)),
        ("rate=18", 430, (200, 410), (100, 40), (180, 100, 80)),
        ("GAP", 490, (200, 490), (100, 40), (160, 80, 160)),
    ]
    n_branches = min(int(progress * 5) + 1, 5)
    for i, (_lbl, _h, (bx, by), (bw, bh), color) in enumerate(branches):
        if i < n_branches:
            frame[by : by + bh, bx : bx + bw] = color
            frame[by : by + 2, bx : bx + bw] = tuple(min(c + 50, 255) for c in color)
            ay = by + bh // 2
            frame[ay - 1 : ay + 2, 133:197] = (150, 150, 170)

    concat_phase = 0.6
    if progress > concat_phase:
        frame[250:530, 380:420] = (50, 60, 80)
        frame[250:252, 380:420] = (200, 200, 100)
        frame[528:530, 380:420] = (200, 200, 100)
        for i, (_lbl, _h, (bx, by), (bw, bh), _c) in enumerate(branches):
            if i < n_branches:
                ay = by + bh // 2
                frame[ay - 1 : ay + 2, bx + bw + 3 : 378] = (150, 150, 170)

    final_conv_phase = 0.8
    if progress > final_conv_phase:
        frame[350:420, 450:550] = (100, 200, 100)
        frame[350:352, 450:550] = (150, 230, 150)
        frame[418:420, 450:550] = (150, 230, 150)
        frame[388:391, 423:448] = (150, 150, 170)

    return frame


def _deeplab_demo() -> list[CompositeVideoClip]:
    """Animate DeepLab: dilated convolution + ASPP step by step."""
    dur = STEP_DUR + 1

    # Slide 1: Regular vs Dilated convolution
    dil_clip = VideoClip(_make_dilated_frame, duration=dur).with_fps(FPS)
    labels = [
        ("DeepLab: Atrous (Dilated) Convolution", 26, "#FFE082", FONT_B, (80, 20)),
        (
            "KROK 1: Zrozum dilated convolution — filtr z DZIURAMI",
            18,
            "#A5D6A7",
            FONT_R,
            (80, 60),
        ),
        ("rate=1 (zwykła)", 14, "#64B5F6", FONT_B, (60, 160)),
        ("RF = 3x3", 14, "#64B5F6", FONT_R, (60, 440)),
        ("9 wag, kontekst 3px", 12, "#78909C", FONT_R, (60, 470)),
        ("rate=2 (dilated)", 14, "#FFE082", FONT_B, (420, 160)),
        ("RF = 5x5", 14, "#FFE082", FONT_R, (420, 440)),
        ("9 wag, kontekst 5px!", 12, "#78909C", FONT_R, (420, 470)),
        ("rate=3 (dilated)", 14, "#A5D6A7", FONT_B, (820, 160)),
        ("RF = 7x7", 14, "#A5D6A7", FONT_R, (820, 440)),
        ("9 wag, kontekst 7px!", 12, "#78909C", FONT_R, (820, 470)),
        (
            "Niebieski = pozycja wag filtra 3x3 | Szary = pominięte (dziury)",
            15,
            "#B0BEC5",
            FONT_R,
            (80, 510),
        ),
        (
            "TE SAME 9 wag → WIĘKSZE pole widzenia "
            "→ lepszy kontekst BEZ dodatkowych parametrów!",
            16,
            "white",
            FONT_R,
            (80, 550),
        ),
        (
            "Mnemonik: DZIURY w filtrze — à trous = z dziurami (fr.)",
            16,
            "#FFE082",
            FONT_R,
            (80, 600),
        ),
    ]
    slides = [_compose_slide(dil_clip, labels, dur)]

    # Slide 2: ASPP module step by step
    aspp_clip = VideoClip(_make_aspp_frame, duration=dur).with_fps(FPS)
    labels2 = [
        (
            "DeepLab: ASPP (Atrous Spatial Pyramid Pooling)",
            24,
            "#FFE082",
            FONT_B,
            (80, 20),
        ),
        (
            "KROK 2: Multi-scale — analizuj obraz na WIELU skalach naraz",
            17,
            "#A5D6A7",
            FONT_R,
            (80, 60),
        ),
        ("Wejście", 13, "#64B5F6", FONT_B, (55, 235)),
        ("Conv 1x1", 12, "white", FONT_R, (210, 178)),
        ("Dilated r=6", 12, "white", FONT_R, (205, 258)),
        ("Dilated r=12", 12, "white", FONT_R, (203, 338)),
        ("Dilated r=18", 12, "white", FONT_R, (203, 418)),
        ("GAP (global)", 12, "white", FONT_R, (205, 498)),
        ("Concat", 13, "#FFE082", FONT_B, (381, 537)),
        ("Conv", 13, "#A5D6A7", FONT_B, (470, 425)),
        (
            "5 gałęzi RÓWNOLEGŁYCH → różne skale kontekstu:",
            16,
            "#B0BEC5",
            FONT_R,
            (550, 170),
        ),
        ("  1x1: kontekst punktowy (piksel)", 14, "#A5D6A7", FONT_R, (560, 210)),
        ("  r=6: kontekst lokalny (~13px)", 14, "#FFE082", FONT_R, (560, 245)),
        ("  r=12: kontekst średni (~25px)", 14, "#FFE082", FONT_R, (560, 280)),
        ("  r=18: kontekst szeroki (~37px)", 14, "#FFE082", FONT_R, (560, 315)),
        ("  GAP: kontekst GLOBALNY (cały obraz)", 14, "#CE93D8", FONT_R, (560, 350)),
        ("Concat → 1x1 conv → mapa segmentacji", 16, "#A5D6A7", FONT_R, (550, 400)),
        (
            "Efekt: sieć widzi OD piksela DO całego obrazu naraz!",
            17,
            "white",
            FONT_R,
            (80, 600),
        ),
        (
            "Mnemonik: ASPP = Piramida z DZIURAMI, patrzy na 5 skal jednocześnie",
            15,
            "#FFE082",
            FONT_R,
            (80, 645),
        ),
    ]
    slides.append(_compose_slide(aspp_clip, labels2, dur))

    return slides
WIP: Enforce 500-line limit - split batch 1 Split 16+ files. 27 files still need splitting. See session notes. 2026-03-16 22:46:48 +01:00			`"""DeepLab architecture animations for Q23 segmentation video."""`

			`from __future__ import annotations`

			`from moviepy import (`
			`CompositeVideoClip,`
			`VideoClip,`
			`)`
			`import numpy as np`

			`from python_pkg.praca_magisterska_video._q23_helpers import (`
			`BG_COLOR,`
			`FONT_B,`
			`FONT_R,`
			`FPS,`
			`STEP_DUR,`
			`H,`
			`W,`
			`_compose_slide,`
			`)`


			`# ── DeepLab Architecture ─────────────────────────────────────────`
			`def _make_dilated_frame(t: float) -> np.ndarray:`
			`"""Render a dilated convolution comparison frame."""`
			`frame = np.zeros((H, W, 3), dtype=np.uint8)`
			`frame[:] = BG_COLOR`
			`progress = min(t / (STEP_DUR * 0.7), 1.0)`

			`cell = 36`
			`grids = [`
			`(`
			`"rate=1",`
			`60,`
			`[`
			`(0, 0),`
			`(0, 1),`
			`(0, 2),`
			`(1, 0),`
			`(1, 1),`
			`(1, 2),`
			`(2, 0),`
			`(2, 1),`
			`(2, 2),`
			`],`
			`),`
			`(`
			`"rate=2",`
			`420,`
			`[`
			`(0, 0),`
			`(0, 2),`
			`(0, 4),`
			`(2, 0),`
			`(2, 2),`
			`(2, 4),`
			`(4, 0),`
			`(4, 2),`
			`(4, 4),`
			`],`
			`),`
			`(`
			`"rate=3",`
			`820,`
			`[`
			`(0, 0),`
			`(0, 3),`
			`(0, 6),`
			`(3, 0),`
			`(3, 3),`
			`(3, 6),`
			`(6, 0),`
			`(6, 3),`
			`(6, 6),`
			`],`
			`),`
			`]`

			`for gi, (_label, gx, positions) in enumerate(grids):`
			`if progress < gi * 0.3:`
			`break`
			`gy = 180`
			`grid_size = 7`
			`for r in range(grid_size):`
			`for c in range(grid_size):`
			`x = gx + c * cell`
			`y = gy + r * cell`
			`frame[y : y + cell - 2, x : x + cell - 2] = (35, 40, 55)`
			`for r, c in positions:`
			`x = gx + c * cell`
			`y = gy + r * cell`
			`frame[y : y + cell - 2, x : x + cell - 2] = (70, 130, 200)`
			`frame[y : y + 2, x : x + cell - 2] = (120, 180, 255)`
			`frame[y + cell - 4 : y + cell - 2, x : x + cell - 2] = (120, 180, 255)`

			`return frame`


			`def _make_aspp_frame(t: float) -> np.ndarray:`
			`"""Render a single ASPP module animation frame."""`
			`frame = np.zeros((H, W, 3), dtype=np.uint8)`
			`frame[:] = BG_COLOR`
			`progress = min(t / (STEP_DUR * 0.7), 1.0)`

			`frame[250:330, 50:130] = (70, 130, 200)`
			`frame[250:252, 50:130] = (120, 180, 255)`
			`frame[328:330, 50:130] = (120, 180, 255)`

			`branches = [`
			`("1x1 conv", 250, (200, 170), (100, 40), (80, 200, 120)),`
			`("rate=6", 310, (200, 250), (100, 40), (200, 160, 80)),`
			`("rate=12", 370, (200, 330), (100, 40), (200, 120, 60)),`
			`("rate=18", 430, (200, 410), (100, 40), (180, 100, 80)),`
			`("GAP", 490, (200, 490), (100, 40), (160, 80, 160)),`
			`]`
			`n_branches = min(int(progress * 5) + 1, 5)`
			`for i, (_lbl, _h, (bx, by), (bw, bh), color) in enumerate(branches):`
			`if i < n_branches:`
			`frame[by : by + bh, bx : bx + bw] = color`
			`frame[by : by + 2, bx : bx + bw] = tuple(min(c + 50, 255) for c in color)`
			`ay = by + bh // 2`
			`frame[ay - 1 : ay + 2, 133:197] = (150, 150, 170)`

			`concat_phase = 0.6`
			`if progress > concat_phase:`
			`frame[250:530, 380:420] = (50, 60, 80)`
			`frame[250:252, 380:420] = (200, 200, 100)`
			`frame[528:530, 380:420] = (200, 200, 100)`
			`for i, (_lbl, _h, (bx, by), (bw, bh), _c) in enumerate(branches):`
			`if i < n_branches:`
			`ay = by + bh // 2`
			`frame[ay - 1 : ay + 2, bx + bw + 3 : 378] = (150, 150, 170)`

			`final_conv_phase = 0.8`
			`if progress > final_conv_phase:`
			`frame[350:420, 450:550] = (100, 200, 100)`
			`frame[350:352, 450:550] = (150, 230, 150)`
			`frame[418:420, 450:550] = (150, 230, 150)`
			`frame[388:391, 423:448] = (150, 150, 170)`

			`return frame`


			`def _deeplab_demo() -> list[CompositeVideoClip]:`
			`"""Animate DeepLab: dilated convolution + ASPP step by step."""`
			`dur = STEP_DUR + 1`

			`# Slide 1: Regular vs Dilated convolution`
			`dil_clip = VideoClip(_make_dilated_frame, duration=dur).with_fps(FPS)`
			`labels = [`
			`("DeepLab: Atrous (Dilated) Convolution", 26, "#FFE082", FONT_B, (80, 20)),`
			`(`
			`"KROK 1: Zrozum dilated convolution — filtr z DZIURAMI",`
			`18,`
			`"#A5D6A7",`
			`FONT_R,`
			`(80, 60),`
			`),`
			`("rate=1 (zwykła)", 14, "#64B5F6", FONT_B, (60, 160)),`
			`("RF = 3x3", 14, "#64B5F6", FONT_R, (60, 440)),`
			`("9 wag, kontekst 3px", 12, "#78909C", FONT_R, (60, 470)),`
			`("rate=2 (dilated)", 14, "#FFE082", FONT_B, (420, 160)),`
			`("RF = 5x5", 14, "#FFE082", FONT_R, (420, 440)),`
			`("9 wag, kontekst 5px!", 12, "#78909C", FONT_R, (420, 470)),`
			`("rate=3 (dilated)", 14, "#A5D6A7", FONT_B, (820, 160)),`
			`("RF = 7x7", 14, "#A5D6A7", FONT_R, (820, 440)),`
			`("9 wag, kontekst 7px!", 12, "#78909C", FONT_R, (820, 470)),`
			`(`
			`"Niebieski = pozycja wag filtra 3x3 \| Szary = pominięte (dziury)",`
			`15,`
			`"#B0BEC5",`
			`FONT_R,`
			`(80, 510),`
			`),`
			`(`
			`"TE SAME 9 wag → WIĘKSZE pole widzenia "`
			`"→ lepszy kontekst BEZ dodatkowych parametrów!",`
			`16,`
			`"white",`
			`FONT_R,`
			`(80, 550),`
			`),`
			`(`
			`"Mnemonik: DZIURY w filtrze — à trous = z dziurami (fr.)",`
			`16,`
			`"#FFE082",`
			`FONT_R,`
			`(80, 600),`
			`),`
			`]`
			`slides = [_compose_slide(dil_clip, labels, dur)]`

			`# Slide 2: ASPP module step by step`
			`aspp_clip = VideoClip(_make_aspp_frame, duration=dur).with_fps(FPS)`
			`labels2 = [`
			`(`
			`"DeepLab: ASPP (Atrous Spatial Pyramid Pooling)",`
			`24,`
			`"#FFE082",`
			`FONT_B,`
			`(80, 20),`
			`),`
			`(`
			`"KROK 2: Multi-scale — analizuj obraz na WIELU skalach naraz",`
			`17,`
			`"#A5D6A7",`
			`FONT_R,`
			`(80, 60),`
			`),`
			`("Wejście", 13, "#64B5F6", FONT_B, (55, 235)),`
			`("Conv 1x1", 12, "white", FONT_R, (210, 178)),`
			`("Dilated r=6", 12, "white", FONT_R, (205, 258)),`
			`("Dilated r=12", 12, "white", FONT_R, (203, 338)),`
			`("Dilated r=18", 12, "white", FONT_R, (203, 418)),`
			`("GAP (global)", 12, "white", FONT_R, (205, 498)),`
			`("Concat", 13, "#FFE082", FONT_B, (381, 537)),`
			`("Conv", 13, "#A5D6A7", FONT_B, (470, 425)),`
			`(`
			`"5 gałęzi RÓWNOLEGŁYCH → różne skale kontekstu:",`
			`16,`
			`"#B0BEC5",`
			`FONT_R,`
			`(550, 170),`
			`),`
			`(" 1x1: kontekst punktowy (piksel)", 14, "#A5D6A7", FONT_R, (560, 210)),`
			`(" r=6: kontekst lokalny (~13px)", 14, "#FFE082", FONT_R, (560, 245)),`
			`(" r=12: kontekst średni (~25px)", 14, "#FFE082", FONT_R, (560, 280)),`
			`(" r=18: kontekst szeroki (~37px)", 14, "#FFE082", FONT_R, (560, 315)),`
			`(" GAP: kontekst GLOBALNY (cały obraz)", 14, "#CE93D8", FONT_R, (560, 350)),`
			`("Concat → 1x1 conv → mapa segmentacji", 16, "#A5D6A7", FONT_R, (550, 400)),`
			`(`
			`"Efekt: sieć widzi OD piksela DO całego obrazu naraz!",`
			`17,`
			`"white",`
			`FONT_R,`
			`(80, 600),`
			`),`
			`(`
			`"Mnemonik: ASPP = Piramida z DZIURAMI, patrzy na 5 skal jednocześnie",`
			`15,`
			`"#FFE082",`
			`FONT_R,`
			`(80, 645),`
			`),`
			`]`
			`slides.append(_compose_slide(aspp_clip, labels2, dur))`

			`return slides`