diff --git a/python_pkg/praca_magisterska_video/visualize_q02.py b/python_pkg/praca_magisterska_video/visualize_q02.py
index fd5e859..717cac0 100644
--- a/python_pkg/praca_magisterska_video/visualize_q02.py
+++ b/python_pkg/praca_magisterska_video/visualize_q02.py
@@ -6,6 +6,8 @@ on a small example graph, rendering each algorithm step by step.
 
 from __future__ import annotations
 
+from dataclasses import dataclass
+import logging
 import os
 from pathlib import Path
 
@@ -33,6 +35,9 @@ OUTPUT_DIR = Path(__file__).resolve().parent / "videos"
 OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
 OUTPUT = str(OUTPUT_DIR / "q02_shortest_path.mp4")
 
+logging.basicConfig(level=logging.INFO)
+_logger = logging.getLogger(__name__)
+
 # Graph definition
 NODE_POS = {"S": (250, 280), "A": (550, 180), "B": (550, 450), "C": (850, 320)}
 EDGES_DIJKSTRA = [
@@ -101,13 +106,13 @@ def _draw_circle(
 
 def _draw_line(
     frame: np.ndarray,
-    x1: int,
-    y1: int,
-    x2: int,
-    y2: int,
+    start: tuple[int, int],
+    end: tuple[int, int],
     color: tuple[int, ...],
     thickness: int = 2,
 ) -> None:
+    x1, y1 = start
+    x2, y2 = end
     length = max(int(np.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)), 1)
     for i in range(length):
         frac = i / length
@@ -122,13 +127,13 @@ def _draw_line(
 
 def _draw_arrow(
     frame: np.ndarray,
-    x1: int,
-    y1: int,
-    x2: int,
-    y2: int,
+    start: tuple[int, int],
+    end: tuple[int, int],
     color: tuple[int, ...],
     thickness: int = 2,
 ) -> None:
+    x1, y1 = start
+    x2, y2 = end
     r = 32
     length = max(np.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2), 1)
     ddx = (x2 - x1) / length
@@ -137,14 +142,14 @@ def _draw_arrow(
     sy = int(y1 + ddy * r)
     ex = int(x2 - ddx * r)
     ey = int(y2 - ddy * r)
-    _draw_line(frame, sx, sy, ex, ey, color, thickness)
+    _draw_line(frame, (sx, sy), (ex, ey), color, thickness)
     angle = np.arctan2(ey - sy, ex - sx)
     arrow_len = 12
     for side in [-1, 1]:
         a = angle + np.pi + side * 0.4
         ax = int(ex + arrow_len * np.cos(a))
         ay = int(ey + arrow_len * np.sin(a))
-        _draw_line(frame, ex, ey, ax, ay, color, thickness)
+        _draw_line(frame, (ex, ey), (ax, ay), color, thickness)
 
 
 def _render_graph(
@@ -163,7 +168,7 @@ def _render_graph(
         sx, sy = nodes[src]
         dx, dy = nodes[dst]
         ec = COL_EDGE_ACT if active_edge == (src, dst) else COL_EDGE
-        _draw_arrow(frame, sx, sy, dx, dy, ec, thickness=2)
+        _draw_arrow(frame, (sx, sy), (dx, dy), ec, thickness=2)
 
     for name, (x, y) in nodes.items():
         if name == current:
@@ -184,19 +189,32 @@ def _render_graph(
     return frame
 
 
+@dataclass
+class _StepConfig:
+    """Configuration for a single algorithm visualization step."""
+
+    nodes: dict[str, tuple[int, int]]
+    edges: list[tuple[str, str, int]]
+    distances: dict[str, str]
+    current: str | None = None
+    visited: set[str] | None = None
+    active_edge: tuple[str, str] | None = None
+    step_text: str = ""
+    algo_name: str = ""
+
+
 def _make_step(
-    nodes: dict[str, tuple[int, int]],
-    edges: list[tuple[str, str, int]],
-    distances: dict[str, str],
-    current: str | None = None,
-    visited: set[str] | None = None,
-    active_edge: tuple[str, str] | None = None,
-    step_text: str = "",
-    algo_name: str = "",
+    cfg: _StepConfig,
     duration: float = STEP_DUR,
 ) -> CompositeVideoClip:
-    if visited is None:
-        visited = set()
+    nodes = cfg.nodes
+    edges = cfg.edges
+    distances = cfg.distances
+    current = cfg.current
+    visited = cfg.visited if cfg.visited is not None else set()
+    active_edge = cfg.active_edge
+    step_text = cfg.step_text
+    algo_name = cfg.algo_name
 
     graph_frame = _render_graph(nodes, edges, distances, current, visited, active_edge)
 
@@ -305,50 +323,66 @@ def _dijkstra_steps() -> list[CompositeVideoClip]:
     e = EDGES_DIJKSTRA
     return [
         _make_step(
-            n,
-            e,
-            {"S": "0", "A": INF, "B": INF, "C": INF},
-            current="S",
-            step_text="Inicjalizacja: d[S]=0, reszta=∞. Wybierz S (min d).",
-            algo_name="Algorytm Dijkstry",
+            _StepConfig(
+                n,
+                e,
+                {"S": "0", "A": INF, "B": INF, "C": INF},
+                current="S",
+                step_text="Inicjalizacja: d[S]=0, reszta=∞. Wybierz S (min d).",
+                algo_name="Algorytm Dijkstry",
+            ),
         ),
         _make_step(
-            n,
-            e,
-            {"S": "0", "A": "2", "B": "5", "C": INF},
-            current="S",
-            active_edge=("S", "A"),
-            step_text="Relaksacja S→A: d[A]=0+2=2.  S→B: d[B]=0+5=5.",
-            algo_name="Algorytm Dijkstry",
+            _StepConfig(
+                n,
+                e,
+                {"S": "0", "A": "2", "B": "5", "C": INF},
+                current="S",
+                active_edge=("S", "A"),
+                step_text="Relaksacja S→A: d[A]=0+2=2.  S→B: d[B]=0+5=5.",
+                algo_name="Algorytm Dijkstry",
+            ),
         ),
         _make_step(
-            n,
-            e,
-            {"S": "0", "A": "2", "B": "5", "C": "5"},
-            current="A",
-            visited={"S"},
-            active_edge=("A", "C"),
-            step_text="Zamknij S. Min=A(2). Relaksacja A→C: d[C]=2+3=5.",
-            algo_name="Algorytm Dijkstry",
+            _StepConfig(
+                n,
+                e,
+                {"S": "0", "A": "2", "B": "5", "C": "5"},
+                current="A",
+                visited={"S"},
+                active_edge=("A", "C"),
+                step_text="Zamknij S. Min=A(2). Relaksacja A→C: d[C]=2+3=5.",
+                algo_name="Algorytm Dijkstry",
+            ),
         ),
         _make_step(
-            n,
-            e,
-            {"S": "0", "A": "2", "B": "5", "C": "5"},
-            current="B",
-            visited={"S", "A"},
-            active_edge=("B", "A"),
-            step_text="Zamknij A. Min=B(5). B→A: 5+1=6>2, nie zmieniaj. B→C: 5+6=11>5.",
-            algo_name="Algorytm Dijkstry",
+            _StepConfig(
+                n,
+                e,
+                {"S": "0", "A": "2", "B": "5", "C": "5"},
+                current="B",
+                visited={"S", "A"},
+                active_edge=("B", "A"),
+                step_text=(
+                    "Zamknij A. Min=B(5). B→A: 5+1=6>2, "
+                    "nie zmieniaj. B→C: 5+6=11>5."
+                ),
+                algo_name="Algorytm Dijkstry",
+            ),
         ),
         _make_step(
-            n,
-            e,
-            {"S": "0", "A": "2", "B": "5", "C": "5"},
-            current="C",
-            visited={"S", "A", "B"},
-            step_text="Zamknij B. Min=C(5). Koniec!  Wynik: d={S:0, A:2, B:5, C:5}.",
-            algo_name="Dijkstra -- WYNIK",
+            _StepConfig(
+                n,
+                e,
+                {"S": "0", "A": "2", "B": "5", "C": "5"},
+                current="C",
+                visited={"S", "A", "B"},
+                step_text=(
+                    "Zamknij B. Min=C(5). Koniec! "
+                    "Wynik: d={S:0, A:2, B:5, C:5}."
+                ),
+                algo_name="Dijkstra -- WYNIK",
+            ),
         ),
     ]
 
@@ -358,42 +392,67 @@ def _bellman_ford_steps() -> list[CompositeVideoClip]:
     e = EDGES_BF
     return [
         _make_step(
-            n,
-            e,
-            {"S": "0", "A": INF, "B": INF, "C": INF},
-            step_text="Bellman-Ford: relaksuj WSZYSTKIE krawędzie V-1=3 razy. Ujemne wagi OK!",
-            algo_name="Algorytm Bellmana-Forda",
+            _StepConfig(
+                n,
+                e,
+                {"S": "0", "A": INF, "B": INF, "C": INF},
+                step_text=(
+                    "Bellman-Ford: relaksuj WSZYSTKIE "
+                    "krawędzie V-1=3 razy. Ujemne wagi OK!"
+                ),
+                algo_name="Algorytm Bellmana-Forda",
+            ),
         ),
         _make_step(
-            n,
-            e,
-            {"S": "0", "A": "2", "B": "5", "C": "5"},
-            active_edge=("S", "A"),
-            step_text="Iteracja 1: S→A:2, A→C:5, S→B:5. Potem B→A: 5+(-4)=1 < 2 → A=1!",
-            algo_name="Bellman-Ford -- iteracja 1",
+            _StepConfig(
+                n,
+                e,
+                {"S": "0", "A": "2", "B": "5", "C": "5"},
+                active_edge=("S", "A"),
+                step_text=(
+                    "Iteracja 1: S→A:2, A→C:5, S→B:5. "
+                    "Potem B→A: 5+(-4)=1 < 2 → A=1!"
+                ),
+                algo_name="Bellman-Ford -- iteracja 1",
+            ),
         ),
         _make_step(
-            n,
-            e,
-            {"S": "0", "A": "1", "B": "5", "C": "5"},
-            active_edge=("B", "A"),
-            step_text="B→A z ujemną wagą -4: d[A] poprawione z 2 na 1! (Dijkstra by to pominął!)",
-            algo_name="Bellman-Ford -- ujemna waga",
+            _StepConfig(
+                n,
+                e,
+                {"S": "0", "A": "1", "B": "5", "C": "5"},
+                active_edge=("B", "A"),
+                step_text=(
+                    "B→A z ujemną wagą -4: d[A] poprawione "
+                    "z 2 na 1! (Dijkstra by to pominął!)"
+                ),
+                algo_name="Bellman-Ford -- ujemna waga",
+            ),
         ),
         _make_step(
-            n,
-            e,
-            {"S": "0", "A": "1", "B": "5", "C": "4"},
-            active_edge=("A", "C"),
-            step_text="Iteracja 2: A→C: 1+3=4 < 5 → C=4. Propagacja poprawionego A.",
-            algo_name="Bellman-Ford -- iteracja 2",
+            _StepConfig(
+                n,
+                e,
+                {"S": "0", "A": "1", "B": "5", "C": "4"},
+                active_edge=("A", "C"),
+                step_text=(
+                    "Iteracja 2: A→C: 1+3=4 < 5 → C=4. "
+                    "Propagacja poprawionego A."
+                ),
+                algo_name="Bellman-Ford -- iteracja 2",
+            ),
         ),
         _make_step(
-            n,
-            e,
-            {"S": "0", "A": "1", "B": "5", "C": "4"},
-            step_text="Iteracja 3: brak zmian. V-ta iteracja: brak popraw → brak cyklu ujemnego.",
-            algo_name="Bellman-Ford -- WYNIK, O(V*E)",
+            _StepConfig(
+                n,
+                e,
+                {"S": "0", "A": "1", "B": "5", "C": "4"},
+                step_text=(
+                    "Iteracja 3: brak zmian. V-ta iteracja: "
+                    "brak popraw → brak cyklu ujemnego."
+                ),
+                algo_name="Bellman-Ford -- WYNIK, O(V*E)",
+            ),
         ),
     ]
 
@@ -403,40 +462,60 @@ def _astar_steps() -> list[CompositeVideoClip]:
     e = EDGES_DIJKSTRA
     return [
         _make_step(
-            n,
-            e,
-            {"S": "0", "A": INF, "B": INF, "C": INF},
-            current="S",
-            step_text="A*: f(n)=g(n)+h(n). Cel=C. h(S)=5, h(A)=3, h(B)=4, h(C)=0. f(S)=0+5=5.",
-            algo_name="Algorytm A*",
+            _StepConfig(
+                n,
+                e,
+                {"S": "0", "A": INF, "B": INF, "C": INF},
+                current="S",
+                step_text=(
+                    "A*: f(n)=g(n)+h(n). Cel=C. "
+                    "h(S)=5, h(A)=3, h(B)=4, h(C)=0. f(S)=0+5=5."
+                ),
+                algo_name="Algorytm A*",
+            ),
         ),
         _make_step(
-            n,
-            e,
-            {"S": "0", "A": "2", "B": "5", "C": INF},
-            current="S",
-            active_edge=("S", "A"),
-            step_text="Relaksuj S: A(g=2,f=2+3=5), B(g=5,f=5+4=9). Min f → A(5).",
-            algo_name="A* -- rozwijanie S",
+            _StepConfig(
+                n,
+                e,
+                {"S": "0", "A": "2", "B": "5", "C": INF},
+                current="S",
+                active_edge=("S", "A"),
+                step_text=(
+                    "Relaksuj S: A(g=2,f=2+3=5), "
+                    "B(g=5,f=5+4=9). Min f → A(5)."
+                ),
+                algo_name="A* -- rozwijanie S",
+            ),
         ),
         _make_step(
-            n,
-            e,
-            {"S": "0", "A": "2", "B": "5", "C": "5"},
-            current="A",
-            visited={"S"},
-            active_edge=("A", "C"),
-            step_text="Rozwiń A(f=5): A→C: g=2+3=5, f=5+0=5. Min f → C(5) = CEL!",
-            algo_name="A* -- rozwijanie A",
+            _StepConfig(
+                n,
+                e,
+                {"S": "0", "A": "2", "B": "5", "C": "5"},
+                current="A",
+                visited={"S"},
+                active_edge=("A", "C"),
+                step_text=(
+                    "Rozwiń A(f=5): A→C: g=2+3=5, "
+                    "f=5+0=5. Min f → C(5) = CEL!"
+                ),
+                algo_name="A* -- rozwijanie A",
+            ),
         ),
         _make_step(
-            n,
-            e,
-            {"S": "0", "A": "2", "B": "5", "C": "5"},
-            current="C",
-            visited={"S", "A"},
-            step_text="Dotarliśmy do C! Koszt=5. A* NIE przetwarza B (3 vs 4 w Dijkstrze).",
-            algo_name="A* -- cel osiągnięty!",
+            _StepConfig(
+                n,
+                e,
+                {"S": "0", "A": "2", "B": "5", "C": "5"},
+                current="C",
+                visited={"S", "A"},
+                step_text=(
+                    "Dotarliśmy do C! Koszt=5. "
+                    "A* NIE przetwarza B (3 vs 4 w Dijkstrze)."
+                ),
+                algo_name="A* -- cel osiągnięty!",
+            ),
         ),
     ]
 
@@ -523,7 +602,7 @@ def main() -> None:
     final.write_videofile(
         OUTPUT, fps=FPS, codec="libx264", audio=False, preset="medium", threads=4
     )
-    print(f"Video saved to: {OUTPUT}")
+    _logger.info("Video saved to: %s", OUTPUT)
 
 
 if __name__ == "__main__":
diff --git a/python_pkg/praca_magisterska_video/visualize_q23.py b/python_pkg/praca_magisterska_video/visualize_q23.py
index 9981934..91894e8 100644
--- a/python_pkg/praca_magisterska_video/visualize_q23.py
+++ b/python_pkg/praca_magisterska_video/visualize_q23.py
@@ -10,6 +10,7 @@ Creates animated video demonstrating:
 
 from __future__ import annotations
 
+import logging
 import os
 from pathlib import Path
 
@@ -37,6 +38,9 @@ OUTPUT_DIR = Path(__file__).resolve().parent / "videos"
 OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
 OUTPUT = str(OUTPUT_DIR / "q23_segmentation.mp4")
 
+logging.basicConfig(level=logging.INFO)
+_logger = logging.getLogger(__name__)
+
 BG_COLOR = (15, 20, 35)
 rng = np.random.default_rng(42)
 
@@ -102,6 +106,25 @@ def _text_slide(
     )
 
 
+def _compose_slide(
+    base_clip: VideoClip,
+    labels: list[tuple[str, int, str, str, tuple[int, int]]],
+    duration: float,
+) -> CompositeVideoClip:
+    """Overlay text labels on an animated base clip."""
+    text_clips: list[VideoClip] = [base_clip]
+    for text, fs, color, font, pos in labels:
+        tc = (
+            _tc(text=text, font_size=fs, color=color, font=font)
+            .with_duration(duration)
+            .with_position(pos)
+        )
+        text_clips.append(tc)
+    return CompositeVideoClip(text_clips, size=(W, H)).with_effects(
+        [FadeIn(0.3), FadeOut(0.3)]
+    )
+
+
 # ── Segmentation concept ─────────────────────────────────────────
 def _segmentation_concept() -> list[CompositeVideoClip]:
     """Show what segmentation is: pixel-level labeling."""
@@ -164,7 +187,8 @@ def _segmentation_concept() -> list[CompositeVideoClip]:
         ("niebo  |  drzewo  |  droga  |  samochód", 18, "#90CAF9", FONT_R, (600, 420)),
         ("Segmentacja = klasyfikacja per-piksel", 24, "#FFE082", FONT_B, (100, 500)),
         (
-            "Semantic: klasy bez instancji | Instance: rozróżnia obiekty | Panoptic: oba",
+            "Semantic: klasy bez instancji | Instance: "
+            "rozróżnia obiekty | Panoptic: oba",
             16,
             "#78909C",
             FONT_R,
@@ -459,7 +483,8 @@ def _watershed_demo() -> list[CompositeVideoClip]:
 
         # Dam marker at ridge
         ridge_x = ox + int(0.5 * terrain_w)
-        if water_level > 160:
+        dam_visible_threshold = 160
+        if water_level > dam_visible_threshold:
             frame[oy - water_level : oy - 140, ridge_x - 2 : ridge_x + 2] = (
                 255,
                 80,
@@ -495,7 +520,9 @@ def _watershed_demo() -> list[CompositeVideoClip]:
             (100, 160),
         ),
         (
-            "Problem: over-segmentation (za dużo regionów). Rozwiązanie: marker-controlled.",
+            "Problem: over-segmentation "
+            "(za dużo regionów). "
+            "Rozwiązanie: marker-controlled.",
             16,
             "#A5D6A7",
             FONT_R,
@@ -526,84 +553,84 @@ def _watershed_demo() -> list[CompositeVideoClip]:
 
 
 # ── U-Net Architecture ───────────────────────────────────────────
+def _draw_unet_skips(
+    frame: np.ndarray,
+    enc_positions: list[tuple[int, int, int, int]],
+    n_blocks: int,
+    dec_x: int,
+    skip_threshold: int,
+) -> None:
+    """Draw horizontal dashed skip-connection lines."""
+    if n_blocks <= skip_threshold:
+        return
+    for i in range(min(n_blocks - 5, 4)):
+        ey = enc_positions[i][1] + enc_positions[i][3] // 2
+        ex_end = enc_positions[i][0] + enc_positions[i][2]
+        for dash_x in range(ex_end + 10, dec_x - 10, 15):
+            frame[ey : ey + 2, dash_x : dash_x + 8] = (255, 200, 50)
+
+
+def _make_unet_frame(t: float) -> np.ndarray:
+    """Render a single U-Net animation frame."""
+    frame = np.zeros((H, W, 3), dtype=np.uint8)
+    frame[:] = BG_COLOR
+
+    enc_sizes = [(80, 120), (60, 100), (45, 80), (30, 60)]
+    dec_sizes = list(reversed(enc_sizes))
+    enc_x = 150
+    dec_x = 850
+
+    progress = min(t / (STEP_DUR * 0.6), 1.0)
+    n_blocks = int(progress * 8) + 1
+
+    enc_positions: list[tuple[int, int, int, int]] = []
+    y_offset = 120
+    for i, (bw, bh) in enumerate(enc_sizes):
+        x = enc_x
+        y = y_offset + i * 130
+        enc_positions.append((x, y, bw, bh))
+        if i < n_blocks:
+            frame[y : y + bh, x : x + bw] = (70, 130, 200)
+            frame[y : y + 2, x : x + bw] = (100, 180, 255)
+            frame[y + bh - 2 : y + bh, x : x + bw] = (100, 180, 255)
+            frame[y : y + bh, x : x + 2] = (100, 180, 255)
+            frame[y : y + bh, x + bw - 2 : x + bw] = (100, 180, 255)
+            if i < len(enc_sizes) - 1:
+                ax = x + bw // 2
+                ay = y + bh + 10
+                frame[ay : ay + 20, ax - 1 : ax + 2] = (150, 150, 170)
+
+    bx, by = 500, y_offset + 3 * 130 + 30
+    encoder_count = 4
+    if n_blocks > encoder_count:
+        frame[by : by + 50, bx : bx + 25] = (200, 100, 80)
+        frame[by : by + 2, bx : bx + 25] = (255, 140, 100)
+        frame[by + 48 : by + 50, bx : bx + 25] = (255, 140, 100)
+
+    for i, (bw, bh) in enumerate(dec_sizes):
+        x = dec_x
+        y = y_offset + (3 - i) * 130
+        if n_blocks > 4 + i + 1:
+            frame[y : y + bh, x : x + bw] = (80, 200, 120)
+            frame[y : y + 2, x : x + bw] = (120, 230, 150)
+            frame[y + bh - 2 : y + bh, x : x + bw] = (120, 230, 150)
+            frame[y : y + bh, x : x + 2] = (120, 230, 150)
+            frame[y : y + bh, x + bw - 2 : x + bw] = (120, 230, 150)
+            if i < len(dec_sizes) - 1:
+                ax = x + bw // 2
+                ay = y - 30
+                frame[ay : ay + 20, ax - 1 : ax + 2] = (150, 150, 170)
+
+    skip_threshold = 5
+    _draw_unet_skips(frame, enc_positions, n_blocks, dec_x, skip_threshold)
+
+    return frame
+
+
 def _unet_demo() -> list[CompositeVideoClip]:
     """Animate U-Net encoder-decoder architecture."""
-    slides = []
-
-    def make_unet_frame(t: float) -> np.ndarray:
-        frame = np.zeros((H, W, 3), dtype=np.uint8)
-        frame[:] = BG_COLOR
-
-        # Draw U-shape: encoder blocks going down, decoder going up
-        # Encoder: 4 blocks getting smaller
-        enc_sizes = [(80, 120), (60, 100), (45, 80), (30, 60)]
-        dec_sizes = list(reversed(enc_sizes))
-        enc_x = 150
-        dec_x = 850
-
-        progress = min(t / (STEP_DUR * 0.6), 1.0)
-        n_blocks = int(progress * 8) + 1  # 1 to 8
-
-        enc_positions = []
-        y_offset = 120
-        for i, (bw, bh) in enumerate(enc_sizes):
-            x = enc_x
-            y = y_offset + i * 130
-            enc_positions.append((x, y, bw, bh))
-            if i < n_blocks:
-                # Draw encoder block
-                frame[y : y + bh, x : x + bw] = (70, 130, 200)
-                # Border
-                frame[y : y + 2, x : x + bw] = (100, 180, 255)
-                frame[y + bh - 2 : y + bh, x : x + bw] = (100, 180, 255)
-                frame[y : y + bh, x : x + 2] = (100, 180, 255)
-                frame[y : y + bh, x + bw - 2 : x + bw] = (100, 180, 255)
-
-                # Down arrow
-                if i < len(enc_sizes) - 1:
-                    ax = x + bw // 2
-                    ay = y + bh + 10
-                    frame[ay : ay + 20, ax - 1 : ax + 2] = (150, 150, 170)
-
-        # Bottleneck
-        bx, by = 500, y_offset + 3 * 130 + 30
-        if n_blocks > 4:
-            frame[by : by + 50, bx : bx + 25] = (200, 100, 80)
-            frame[by : by + 2, bx : bx + 25] = (255, 140, 100)
-            frame[by + 48 : by + 50, bx : bx + 25] = (255, 140, 100)
-
-        # Decoder
-        dec_positions = []
-        for i, (bw, bh) in enumerate(dec_sizes):
-            x = dec_x
-            y = y_offset + (3 - i) * 130
-            dec_positions.append((x, y, bw, bh))
-            if n_blocks > 4 + i + 1:
-                frame[y : y + bh, x : x + bw] = (80, 200, 120)
-                frame[y : y + 2, x : x + bw] = (120, 230, 150)
-                frame[y + bh - 2 : y + bh, x : x + bw] = (120, 230, 150)
-                frame[y : y + bh, x : x + 2] = (120, 230, 150)
-                frame[y : y + bh, x + bw - 2 : x + bw] = (120, 230, 150)
-
-                # Up arrow
-                if i < len(dec_sizes) - 1:
-                    ax = x + bw // 2
-                    ay = y - 30
-                    frame[ay : ay + 20, ax - 1 : ax + 2] = (150, 150, 170)
-
-        # Skip connections (horizontal dashed lines)
-        if n_blocks > 5:
-            for i in range(min(n_blocks - 5, 4)):
-                ey = enc_positions[i][1] + enc_positions[i][3] // 2
-                ex_end = enc_positions[i][0] + enc_positions[i][2]
-                dx_start = dec_x
-                for dash_x in range(ex_end + 10, dx_start - 10, 15):
-                    frame[ey : ey + 2, dash_x : dash_x + 8] = (255, 200, 50)
-
-        return frame
-
-    unet_clip = VideoClip(make_unet_frame, duration=STEP_DUR + 1).with_fps(FPS)
-    text_clips: list[VideoClip] = [unet_clip]
+    dur = STEP_DUR + 1
+    unet_clip = VideoClip(_make_unet_frame, duration=dur).with_fps(FPS)
     labels = [
         ("U-Net: Encoder-Decoder + Skip Connections", 28, "#FFE082", FONT_B, (80, 20)),
         (
@@ -649,102 +676,99 @@ def _unet_demo() -> list[CompositeVideoClip]:
             (80, 670),
         ),
     ]
-    for text, fs, color, font, pos in labels:
-        tc = (
-            _tc(text=text, font_size=fs, color=color, font=font)
-            .with_duration(STEP_DUR + 1)
-            .with_position(pos)
-        )
-        text_clips.append(tc)
-
-    slides.append(
-        CompositeVideoClip(text_clips, size=(W, H)).with_effects(
-            [FadeIn(0.3), FadeOut(0.3)]
-        )
-    )
-    return slides
+    return [_compose_slide(unet_clip, labels, dur)]
 
 
 # ── FCN Architecture ─────────────────────────────────────────────
+def _draw_pipeline_blocks(
+    frame: np.ndarray,
+    blocks: list[
+        tuple[tuple[int, int], tuple[int, int], tuple[int, int, int]]
+    ],
+    n_visible: int,
+    arrow_limit: int,
+) -> None:
+    """Draw coloured blocks with connecting arrows."""
+    for i, ((bx, by), (bw, bh), color) in enumerate(blocks):
+        if i < n_visible:
+            frame[by : by + bh, bx : bx + bw] = color
+            frame[by : by + 2, bx : bx + bw] = tuple(
+                min(c + 50, 255) for c in color
+            )
+            frame[by + bh - 2 : by + bh, bx : bx + bw] = tuple(
+                min(c + 50, 255) for c in color
+            )
+            if i < arrow_limit:
+                ax = bx + bw + 3
+                ay = by + bh // 2
+                frame[ay - 1 : ay + 2, ax : ax + 12] = (150, 150, 170)
+
+
+def _draw_red_cross(
+    frame: np.ndarray,
+    x_start: int,
+    width: int,
+    top_y: int,
+    height: int,
+) -> None:
+    """Draw a red X across the given rectangle."""
+    for d in range(-2, 3):
+        for step in range(height):
+            x1 = x_start + int(step * width / height)
+            y1 = top_y + step + d
+            if 0 <= y1 < H and 0 <= x1 < W:
+                frame[y1, x1] = (255, 80, 80)
+            y2 = top_y + height - step + d
+            if 0 <= y2 < H and 0 <= x1 < W:
+                frame[y2, x1] = (255, 80, 80)
+
+
+def _make_fcn_frame(t: float) -> np.ndarray:
+    """Render a single FCN comparison frame."""
+    frame = np.zeros((H, W, 3), dtype=np.uint8)
+    frame[:] = BG_COLOR
+    progress = min(t / (STEP_DUR * 0.8), 1.0)
+
+    top_y = 140
+    blocks_classic = [
+        ((80, top_y), (70, 50), (70, 130, 200)),
+        ((170, top_y), (50, 40), (50, 100, 160)),
+        ((240, top_y), (60, 50), (70, 130, 200)),
+        ((320, top_y), (40, 35), (50, 100, 160)),
+        ((385, top_y), (55, 50), (160, 80, 60)),
+        ((465, top_y), (55, 50), (180, 60, 60)),
+        ((545, top_y), (80, 50), (200, 80, 80)),
+    ]
+    n_top = min(int(progress * 7) + 1, 7)
+    arrow_limit = 6
+    _draw_pipeline_blocks(frame, blocks_classic, n_top, arrow_limit)
+
+    cross_phase = 0.6
+    if progress > cross_phase:
+        _draw_red_cross(frame, 385, 135, top_y, 50)
+
+    bot_y = 380
+    blocks_fcn = [
+        ((80, bot_y), (70, 50), (70, 130, 200)),
+        ((170, bot_y), (50, 40), (50, 100, 160)),
+        ((240, bot_y), (60, 50), (70, 130, 200)),
+        ((320, bot_y), (40, 35), (50, 100, 160)),
+        ((385, bot_y), (70, 50), (80, 200, 120)),
+        ((480, bot_y), (75, 50), (200, 160, 80)),
+        ((580, bot_y), (80, 50), (100, 200, 100)),
+    ]
+    fcn_phase = 0.4
+    if progress > fcn_phase:
+        n_bot = min(int((progress - fcn_phase) / 0.6 * 7) + 1, 7)
+        _draw_pipeline_blocks(frame, blocks_fcn, n_bot, arrow_limit)
+
+    return frame
+
+
 def _fcn_demo() -> list[CompositeVideoClip]:
     """Animate FCN step-by-step: FC → Conv 1x1 transformation."""
-    slides = []
-
-    # Slide 1: Classic CNN vs FCN pipeline comparison
-    def make_fcn_frame(t: float) -> np.ndarray:
-        frame = np.zeros((H, W, 3), dtype=np.uint8)
-        frame[:] = BG_COLOR
-        progress = min(t / (STEP_DUR * 0.8), 1.0)
-
-        # TOP: Classic CNN → FC → 1 label
-        top_y = 140
-        blocks_classic = [
-            ((80, top_y), (70, 50), (70, 130, 200)),
-            ((170, top_y), (50, 40), (50, 100, 160)),
-            ((240, top_y), (60, 50), (70, 130, 200)),
-            ((320, top_y), (40, 35), (50, 100, 160)),
-            ((385, top_y), (55, 50), (160, 80, 60)),
-            ((465, top_y), (55, 50), (180, 60, 60)),
-            ((545, top_y), (80, 50), (200, 80, 80)),
-        ]
-        n_top = min(int(progress * 7) + 1, 7)
-        for i, ((bx, by), (bw, bh), color) in enumerate(blocks_classic):
-            if i < n_top:
-                frame[by : by + bh, bx : bx + bw] = color
-                frame[by : by + 2, bx : bx + bw] = tuple(
-                    min(c + 50, 255) for c in color
-                )
-                frame[by + bh - 2 : by + bh, bx : bx + bw] = tuple(
-                    min(c + 50, 255) for c in color
-                )
-                if i < 6:
-                    ax = bx + bw + 3
-                    ay = by + bh // 2
-                    frame[ay - 1 : ay + 2, ax : ax + 12] = (150, 150, 170)
-
-        # Red X over Flatten+FC when FCN appears
-        if progress > 0.6:
-            for d in range(-2, 3):
-                for step in range(50):
-                    x1 = 385 + int(step * 135 / 50)
-                    y1 = top_y + step + d
-                    if 0 <= y1 < H and 0 <= x1 < W:
-                        frame[y1, x1] = (255, 80, 80)
-                    y2 = top_y + 50 - step + d
-                    if 0 <= y2 < H and 0 <= x1 < W:
-                        frame[y2, x1] = (255, 80, 80)
-
-        # BOTTOM: FCN pipeline
-        bot_y = 380
-        blocks_fcn = [
-            ((80, bot_y), (70, 50), (70, 130, 200)),
-            ((170, bot_y), (50, 40), (50, 100, 160)),
-            ((240, bot_y), (60, 50), (70, 130, 200)),
-            ((320, bot_y), (40, 35), (50, 100, 160)),
-            ((385, bot_y), (70, 50), (80, 200, 120)),
-            ((480, bot_y), (75, 50), (200, 160, 80)),
-            ((580, bot_y), (80, 50), (100, 200, 100)),
-        ]
-        if progress > 0.4:
-            n_bot = min(int((progress - 0.4) / 0.6 * 7) + 1, 7)
-            for i, ((bx, by), (bw, bh), color) in enumerate(blocks_fcn):
-                if i < n_bot:
-                    frame[by : by + bh, bx : bx + bw] = color
-                    frame[by : by + 2, bx : bx + bw] = tuple(
-                        min(c + 50, 255) for c in color
-                    )
-                    frame[by + bh - 2 : by + bh, bx : bx + bw] = tuple(
-                        min(c + 50, 255) for c in color
-                    )
-                    if i < 6:
-                        ax = bx + bw + 3
-                        ay = by + bh // 2
-                        frame[ay - 1 : ay + 2, ax : ax + 12] = (150, 150, 170)
-
-        return frame
-
-    fcn_clip = VideoClip(make_fcn_frame, duration=STEP_DUR + 1).with_fps(FPS)
     dur = STEP_DUR + 1
+    fcn_clip = VideoClip(_make_fcn_frame, duration=dur).with_fps(FPS)
     labels = [
         ("FCN: Fully Convolutional Network (2015)", 26, "#FFE082", FONT_B, (80, 20)),
         ("KROK 1: Zamień FC → Conv 1x1", 18, "#A5D6A7", FONT_R, (80, 60)),
@@ -807,19 +831,7 @@ def _fcn_demo() -> list[CompositeVideoClip]:
             (80, 640),
         ),
     ]
-    text_clips: list[VideoClip] = [fcn_clip]
-    for text, fs, color, font, pos in labels:
-        tc = (
-            _tc(text=text, font_size=fs, color=color, font=font)
-            .with_duration(dur)
-            .with_position(pos)
-        )
-        text_clips.append(tc)
-    slides.append(
-        CompositeVideoClip(text_clips, size=(W, H)).with_effects(
-            [FadeIn(0.3), FadeOut(0.3)]
-        )
-    )
+    slides = [_compose_slide(fcn_clip, labels, dur)]
 
     # Slide 2: FCN skip connections step by step
     skip_lines = [
@@ -909,7 +921,8 @@ def _fcn_demo() -> list[CompositeVideoClip]:
             (100, 555),
         ),
         (
-            "Im więcej skip connections → tym więcej detali z encodera → ostrzejszy wynik",
+            "Im więcej skip connections → tym więcej "
+            "detali z encodera → ostrzejszy wynik",
             17,
             "white",
             FONT_R,
@@ -922,90 +935,134 @@ def _fcn_demo() -> list[CompositeVideoClip]:
 
 
 # ── DeepLab Architecture ─────────────────────────────────────────
-def _deeplab_demo() -> list[CompositeVideoClip]:
-    """Animate DeepLab: dilated convolution + ASPP step by step."""
-    slides = []
+def _make_dilated_frame(t: float) -> np.ndarray:
+    """Render a dilated convolution comparison frame."""
+    frame = np.zeros((H, W, 3), dtype=np.uint8)
+    frame[:] = BG_COLOR
+    progress = min(t / (STEP_DUR * 0.7), 1.0)
 
-    # Slide 1: Regular vs Dilated convolution
-    def make_dilated_frame(t: float) -> np.ndarray:
-        frame = np.zeros((H, W, 3), dtype=np.uint8)
-        frame[:] = BG_COLOR
-        progress = min(t / (STEP_DUR * 0.7), 1.0)
+    cell = 36
+    grids = [
+        (
+            "rate=1",
+            60,
+            [
+                (0, 0),
+                (0, 1),
+                (0, 2),
+                (1, 0),
+                (1, 1),
+                (1, 2),
+                (2, 0),
+                (2, 1),
+                (2, 2),
+            ],
+        ),
+        (
+            "rate=2",
+            420,
+            [
+                (0, 0),
+                (0, 2),
+                (0, 4),
+                (2, 0),
+                (2, 2),
+                (2, 4),
+                (4, 0),
+                (4, 2),
+                (4, 4),
+            ],
+        ),
+        (
+            "rate=3",
+            820,
+            [
+                (0, 0),
+                (0, 3),
+                (0, 6),
+                (3, 0),
+                (3, 3),
+                (3, 6),
+                (6, 0),
+                (6, 3),
+                (6, 6),
+            ],
+        ),
+    ]
 
-        cell = 36
-        # Draw three grids side by side for rate=1, rate=2, rate=3
-        grids = [
-            (
-                "rate=1",
-                60,
-                [
-                    (0, 0),
-                    (0, 1),
-                    (0, 2),
-                    (1, 0),
-                    (1, 1),
-                    (1, 2),
-                    (2, 0),
-                    (2, 1),
-                    (2, 2),
-                ],
-            ),
-            (
-                "rate=2",
-                420,
-                [
-                    (0, 0),
-                    (0, 2),
-                    (0, 4),
-                    (2, 0),
-                    (2, 2),
-                    (2, 4),
-                    (4, 0),
-                    (4, 2),
-                    (4, 4),
-                ],
-            ),
-            (
-                "rate=3",
-                820,
-                [
-                    (0, 0),
-                    (0, 3),
-                    (0, 6),
-                    (3, 0),
-                    (3, 3),
-                    (3, 6),
-                    (6, 0),
-                    (6, 3),
-                    (6, 6),
-                ],
-            ),
-        ]
-
-        for gi, (_label, gx, positions) in enumerate(grids):
-            if progress < gi * 0.3:
-                break
-            gy = 180
-            grid_size = 7
-            # Draw background grid
-            for r in range(grid_size):
-                for c in range(grid_size):
-                    x = gx + c * cell
-                    y = gy + r * cell
-                    frame[y : y + cell - 2, x : x + cell - 2] = (35, 40, 55)
-
-            # Highlight filter positions
-            for r, c in positions:
+    for gi, (_label, gx, positions) in enumerate(grids):
+        if progress < gi * 0.3:
+            break
+        gy = 180
+        grid_size = 7
+        for r in range(grid_size):
+            for c in range(grid_size):
                 x = gx + c * cell
                 y = gy + r * cell
-                frame[y : y + cell - 2, x : x + cell - 2] = (70, 130, 200)
-                frame[y : y + 2, x : x + cell - 2] = (120, 180, 255)
-                frame[y + cell - 4 : y + cell - 2, x : x + cell - 2] = (120, 180, 255)
+                frame[y : y + cell - 2, x : x + cell - 2] = (35, 40, 55)
+        for r, c in positions:
+            x = gx + c * cell
+            y = gy + r * cell
+            frame[y : y + cell - 2, x : x + cell - 2] = (70, 130, 200)
+            frame[y : y + 2, x : x + cell - 2] = (120, 180, 255)
+            frame[y + cell - 4 : y + cell - 2, x : x + cell - 2] = (120, 180, 255)
 
-        return frame
+    return frame
 
-    dil_clip = VideoClip(make_dilated_frame, duration=STEP_DUR + 1).with_fps(FPS)
+
+def _make_aspp_frame(t: float) -> np.ndarray:
+    """Render a single ASPP module animation frame."""
+    frame = np.zeros((H, W, 3), dtype=np.uint8)
+    frame[:] = BG_COLOR
+    progress = min(t / (STEP_DUR * 0.7), 1.0)
+
+    frame[250:330, 50:130] = (70, 130, 200)
+    frame[250:252, 50:130] = (120, 180, 255)
+    frame[328:330, 50:130] = (120, 180, 255)
+
+    branches = [
+        ("1x1 conv", 250, (200, 170), (100, 40), (80, 200, 120)),
+        ("rate=6", 310, (200, 250), (100, 40), (200, 160, 80)),
+        ("rate=12", 370, (200, 330), (100, 40), (200, 120, 60)),
+        ("rate=18", 430, (200, 410), (100, 40), (180, 100, 80)),
+        ("GAP", 490, (200, 490), (100, 40), (160, 80, 160)),
+    ]
+    n_branches = min(int(progress * 5) + 1, 5)
+    for i, (_lbl, _h, (bx, by), (bw, bh), color) in enumerate(branches):
+        if i < n_branches:
+            frame[by : by + bh, bx : bx + bw] = color
+            frame[by : by + 2, bx : bx + bw] = tuple(
+                min(c + 50, 255) for c in color
+            )
+            ay = by + bh // 2
+            frame[ay - 1 : ay + 2, 133:197] = (150, 150, 170)
+
+    concat_phase = 0.6
+    if progress > concat_phase:
+        frame[250:530, 380:420] = (50, 60, 80)
+        frame[250:252, 380:420] = (200, 200, 100)
+        frame[528:530, 380:420] = (200, 200, 100)
+        for i, (_lbl, _h, (bx, by), (bw, bh), _c) in enumerate(branches):
+            if i < n_branches:
+                ay = by + bh // 2
+                frame[ay - 1 : ay + 2, bx + bw + 3 : 378] = (150, 150, 170)
+
+    final_conv_phase = 0.8
+    if progress > final_conv_phase:
+        frame[350:420, 450:550] = (100, 200, 100)
+        frame[350:352, 450:550] = (150, 230, 150)
+        frame[418:420, 450:550] = (150, 230, 150)
+        frame[388:391, 423:448] = (150, 150, 170)
+
+    return frame
+
+
+def _deeplab_demo() -> list[CompositeVideoClip]:
+    """Animate DeepLab: dilated convolution + ASPP step by step."""
     dur = STEP_DUR + 1
+
+    # Slide 1: Regular vs Dilated convolution
+    dil_clip = VideoClip(_make_dilated_frame, duration=dur).with_fps(FPS)
     labels = [
         ("DeepLab: Atrous (Dilated) Convolution", 26, "#FFE082", FONT_B, (80, 20)),
         (
@@ -1032,7 +1089,8 @@ def _deeplab_demo() -> list[CompositeVideoClip]:
             (80, 510),
         ),
         (
-            "TE SAME 9 wag → WIĘKSZE pole widzenia → lepszy kontekst BEZ dodatkowych parametrów!",
+            "TE SAME 9 wag → WIĘKSZE pole widzenia "
+            "→ lepszy kontekst BEZ dodatkowych parametrów!",
             16,
             "white",
             FONT_R,
@@ -1046,72 +1104,10 @@ def _deeplab_demo() -> list[CompositeVideoClip]:
             (80, 600),
         ),
     ]
-    text_clips: list[VideoClip] = [dil_clip]
-    for text, fs, color, font, pos in labels:
-        tc = (
-            _tc(text=text, font_size=fs, color=color, font=font)
-            .with_duration(dur)
-            .with_position(pos)
-        )
-        text_clips.append(tc)
-    slides.append(
-        CompositeVideoClip(text_clips, size=(W, H)).with_effects(
-            [FadeIn(0.3), FadeOut(0.3)]
-        )
-    )
+    slides = [_compose_slide(dil_clip, labels, dur)]
 
     # Slide 2: ASPP module step by step
-    def make_aspp_frame(t: float) -> np.ndarray:
-        frame = np.zeros((H, W, 3), dtype=np.uint8)
-        frame[:] = BG_COLOR
-        progress = min(t / (STEP_DUR * 0.7), 1.0)
-
-        # Input feature map on left
-        frame[250:330, 50:130] = (70, 130, 200)
-        frame[250:252, 50:130] = (120, 180, 255)
-        frame[328:330, 50:130] = (120, 180, 255)
-
-        # ASPP parallel branches
-        branches = [
-            ("1x1 conv", 250, (200, 170), (100, 40), (80, 200, 120)),
-            ("rate=6", 310, (200, 250), (100, 40), (200, 160, 80)),
-            ("rate=12", 370, (200, 330), (100, 40), (200, 120, 60)),
-            ("rate=18", 430, (200, 410), (100, 40), (180, 100, 80)),
-            ("GAP", 490, (200, 490), (100, 40), (160, 80, 160)),
-        ]
-        n_branches = min(int(progress * 5) + 1, 5)
-        for i, (_lbl, _h, (bx, by), (bw, bh), color) in enumerate(branches):
-            if i < n_branches:
-                frame[by : by + bh, bx : bx + bw] = color
-                frame[by : by + 2, bx : bx + bw] = tuple(
-                    min(c + 50, 255) for c in color
-                )
-                # Arrow from input
-                ay = by + bh // 2
-                frame[ay - 1 : ay + 2, 133:197] = (150, 150, 170)
-
-        # Concatenation box
-        if progress > 0.6:
-            frame[250:530, 380:420] = (50, 60, 80)
-            frame[250:252, 380:420] = (200, 200, 100)
-            frame[528:530, 380:420] = (200, 200, 100)
-            # Arrows from branches to concat
-            for i, (_lbl, _h, (bx, by), (bw, bh), _c) in enumerate(branches):
-                if i < n_branches:
-                    ay = by + bh // 2
-                    frame[ay - 1 : ay + 2, bx + bw + 3 : 378] = (150, 150, 170)
-
-        # Final conv after concat
-        if progress > 0.8:
-            frame[350:420, 450:550] = (100, 200, 100)
-            frame[350:352, 450:550] = (150, 230, 150)
-            frame[418:420, 450:550] = (150, 230, 150)
-            # Arrow from concat
-            frame[388:391, 423:448] = (150, 150, 170)
-
-        return frame
-
-    aspp_clip = VideoClip(make_aspp_frame, duration=STEP_DUR + 1).with_fps(FPS)
+    aspp_clip = VideoClip(_make_aspp_frame, duration=dur).with_fps(FPS)
     labels2 = [
         (
             "DeepLab: ASPP (Atrous Spatial Pyramid Pooling)",
@@ -1163,112 +1159,122 @@ def _deeplab_demo() -> list[CompositeVideoClip]:
             (80, 645),
         ),
     ]
-    text_clips2: list[VideoClip] = [aspp_clip]
-    for text, fs, color, font, pos in labels2:
-        tc = (
-            _tc(text=text, font_size=fs, color=color, font=font)
-            .with_duration(dur)
-            .with_position(pos)
-        )
-        text_clips2.append(tc)
-    slides.append(
-        CompositeVideoClip(text_clips2, size=(W, H)).with_effects(
-            [FadeIn(0.3), FadeOut(0.3)]
-        )
-    )
+    slides.append(_compose_slide(aspp_clip, labels2, dur))
 
     return slides
 
 
 # ── Transformer Segmentation ────────────────────────────────────
+def _draw_base_grid(
+    frame: np.ndarray, gx: int, gy: int, grid_n: int, cell: int,
+) -> None:
+    """Draw an empty grid of cells."""
+    for r in range(grid_n):
+        for c in range(grid_n):
+            x = gx + c * cell
+            y = gy + r * cell
+            frame[y : y + cell - 2, x : x + cell - 2] = (35, 40, 55)
+
+
+def _draw_cnn_kernel(
+    frame: np.ndarray, lx: int, ly: int, cell: int, progress: float,
+) -> None:
+    """Highlight a 3x3 CNN kernel on the grid."""
+    cnn_phase = 0.2
+    if progress <= cnn_phase:
+        return
+    cx, cy = 2, 2
+    for dr in range(-1, 2):
+        for dc in range(-1, 2):
+            r, c = cy + dr, cx + dc
+            x = lx + c * cell
+            y = ly + r * cell
+            frame[y : y + cell - 2, x : x + cell - 2] = (70, 130, 200)
+    x = lx + cx * cell
+    y = ly + cy * cell
+    frame[y : y + cell - 2, x : x + cell - 2] = (120, 180, 255)
+
+
+def _draw_conn_line(
+    frame: np.ndarray, x0: int, y0: int, x1: int, y1: int,
+) -> None:
+    """Draw a dashed connection line between two points."""
+    steps = max(abs(x1 - x0), abs(y1 - y0))
+    if steps <= 0:
+        return
+    for s in range(0, steps, 3):
+        px = x0 + int((x1 - x0) * s / steps)
+        py = y0 + int((y1 - y0) * s / steps)
+        if 0 <= px < W - 1 and 0 <= py < H - 1:
+            frame[py : py + 1, px : px + 1] = (200, 180, 50)
+
+
+def _draw_attention_connections(
+    frame: np.ndarray,
+    origin: tuple[int, int],
+    grid_n: int,
+    cell: int,
+    progress: float,
+) -> None:
+    """Draw transformer self-attention connections on the grid."""
+    rx, ry = origin
+    transformer_phase = 0.4
+    if progress <= transformer_phase:
+        return
+    cx_t, cy_t = 2, 2
+    x0 = rx + cx_t * cell + cell // 2
+    y0 = ry + cy_t * cell + cell // 2
+    n_connections = int(progress * 36)
+    conn_idx = 0
+    for r in range(grid_n):
+        for c in range(grid_n):
+            conn_idx += 1
+            if conn_idx > n_connections:
+                break
+            x = rx + c * cell
+            y = ry + r * cell
+            dist = abs(r - cy_t) + abs(c - cx_t)
+            strength = max(30, 200 - dist * 30)
+            frame[y : y + cell - 2, x : x + cell - 2] = (
+                strength // 3,
+                strength // 2,
+                strength,
+            )
+            _draw_conn_line(frame, x0, y0, x + cell // 2, y + cell // 2)
+        else:
+            continue
+        break
+    x = rx + cx_t * cell
+    y = ry + cy_t * cell
+    frame[y : y + cell - 2, x : x + cell - 2] = (255, 200, 50)
+
+
+def _make_attention_frame(t: float) -> np.ndarray:
+    """Render a CNN-vs-Transformer attention comparison frame."""
+    frame = np.zeros((H, W, 3), dtype=np.uint8)
+    frame[:] = BG_COLOR
+    progress = min(t / (STEP_DUR * 0.7), 1.0)
+
+    cell = 40
+    grid_n = 6
+
+    lx, ly = 60, 200
+    _draw_base_grid(frame, lx, ly, grid_n, cell)
+    _draw_cnn_kernel(frame, lx, ly, cell, progress)
+
+    rx, ry = 680, 200
+    _draw_base_grid(frame, rx, ry, grid_n, cell)
+    _draw_attention_connections(frame, (rx, ry), grid_n, cell, progress)
+
+    return frame
+
+
 def _transformer_seg_demo() -> list[CompositeVideoClip]:
     """Animate transformer-based segmentation: self-attention concept."""
-    slides = []
+    dur = STEP_DUR + 1
 
     # Slide 1: CNN local vs Transformer global
-    def make_attention_frame(t: float) -> np.ndarray:
-        frame = np.zeros((H, W, 3), dtype=np.uint8)
-        frame[:] = BG_COLOR
-        progress = min(t / (STEP_DUR * 0.7), 1.0)
-
-        cell = 40
-        grid_n = 6
-
-        # LEFT: CNN — local receptive field
-        lx, ly = 60, 200
-        for r in range(grid_n):
-            for c in range(grid_n):
-                x = lx + c * cell
-                y = ly + r * cell
-                frame[y : y + cell - 2, x : x + cell - 2] = (35, 40, 55)
-
-        # Highlight 3x3 kernel in CNN
-        if progress > 0.2:
-            cx, cy = 2, 2  # center cell
-            for dr in range(-1, 2):
-                for dc in range(-1, 2):
-                    r, c = cy + dr, cx + dc
-                    x = lx + c * cell
-                    y = ly + r * cell
-                    frame[y : y + cell - 2, x : x + cell - 2] = (70, 130, 200)
-            # Center highlighted more
-            x = lx + cx * cell
-            y = ly + cy * cell
-            frame[y : y + cell - 2, x : x + cell - 2] = (120, 180, 255)
-
-        # RIGHT: Transformer — global attention
-        rx, ry = 680, 200
-        for r in range(grid_n):
-            for c in range(grid_n):
-                x = rx + c * cell
-                y = ry + r * cell
-                frame[y : y + cell - 2, x : x + cell - 2] = (35, 40, 55)
-
-        # All cells connected to center
-        if progress > 0.4:
-            cx_t, cy_t = 2, 2
-            # Center cell
-            x0 = rx + cx_t * cell + cell // 2
-            y0 = ry + cy_t * cell + cell // 2
-            n_connections = int(progress * 36)
-            conn_idx = 0
-            for r in range(grid_n):
-                for c in range(grid_n):
-                    conn_idx += 1
-                    if conn_idx > n_connections:
-                        break
-                    x = rx + c * cell
-                    y = ry + r * cell
-                    # Color by "attention strength" — closer = stronger
-                    dist = abs(r - cy_t) + abs(c - cx_t)
-                    strength = max(30, 200 - dist * 30)
-                    frame[y : y + cell - 2, x : x + cell - 2] = (
-                        strength // 3,
-                        strength // 2,
-                        strength,
-                    )
-                    # Draw connection line
-                    x1 = x + cell // 2
-                    y1 = y + cell // 2
-                    steps = max(abs(x1 - x0), abs(y1 - y0))
-                    if steps > 0:
-                        for s in range(0, steps, 3):
-                            px = x0 + int((x1 - x0) * s / steps)
-                            py = y0 + int((y1 - y0) * s / steps)
-                            if 0 <= px < W - 1 and 0 <= py < H - 1:
-                                frame[py : py + 1, px : px + 1] = (200, 180, 50)
-                else:
-                    continue
-                break
-            # Center highlighted strongly
-            x = rx + cx_t * cell
-            y = ry + cy_t * cell
-            frame[y : y + cell - 2, x : x + cell - 2] = (255, 200, 50)
-
-        return frame
-
-    att_clip = VideoClip(make_attention_frame, duration=STEP_DUR + 1).with_fps(FPS)
-    dur = STEP_DUR + 1
+    att_clip = VideoClip(_make_attention_frame, duration=dur).with_fps(FPS)
     labels = [
         ("Transformer: Self-Attention w segmentacji", 26, "#FFE082", FONT_B, (80, 20)),
         ("CNN = LOKALNY kontekst", 18, "#64B5F6", FONT_B, (60, 160)),
@@ -1279,19 +1285,7 @@ def _transformer_seg_demo() -> list[CompositeVideoClip]:
         ("piksel widzi WSZYSTKIE!", 14, "#FFE082", FONT_R, (680, 485)),
         ("vs", 28, "#B0BEC5", FONT_B, (450, 300)),
     ]
-    text_clips: list[VideoClip] = [att_clip]
-    for text, fs, color, font, pos in labels:
-        tc = (
-            _tc(text=text, font_size=fs, color=color, font=font)
-            .with_duration(dur)
-            .with_position(pos)
-        )
-        text_clips.append(tc)
-    slides.append(
-        CompositeVideoClip(text_clips, size=(W, H)).with_effects(
-            [FadeIn(0.3), FadeOut(0.3)]
-        )
-    )
+    slides = [_compose_slide(att_clip, labels, dur)]
 
     # Slide 2: Self-attention Q/K/V step by step
     qkv_lines = [
@@ -1376,7 +1370,8 @@ def _transformer_seg_demo() -> list[CompositeVideoClip]:
             (100, 610),
         ),
         (
-            "Mask2Former (2022): masked attention + unified (semantic+instance+panoptic)",
+            "Mask2Former (2022): masked attention + "
+            "unified (semantic+instance+panoptic)",
             16,
             "#CE93D8",
             FONT_R,
@@ -1520,12 +1515,16 @@ def _methods_comparison() -> CompositeVideoClip:
     ]
 
     clips: list[VideoClip] = [bg, title]
+    mnemonic_col = 3
     for i, row in enumerate(rows):
         y_pos = 75 + i * 72
         col_x = [40, 210, 340, 660]
         for j, cell in enumerate(row):
             fs = 16 if i > 0 else 18
-            color = "#64B5F6" if i == 0 else ("#E0E0E0" if j < 3 else "#FFE082")
+            color = (
+                "#64B5F6" if i == 0
+                else ("#E0E0E0" if j < mnemonic_col else "#FFE082")
+            )
             tc = (
                 _tc(
                     text=cell,
@@ -1620,7 +1619,7 @@ def main() -> None:
     final.write_videofile(
         OUTPUT, fps=FPS, codec="libx264", audio=False, preset="medium", threads=4
     )
-    print(f"Video saved to: {OUTPUT}")
+    _logger.info("Video saved to: %s", OUTPUT)
 
 
 if __name__ == "__main__":
diff --git a/python_pkg/praca_magisterska_video/visualize_q24.py b/python_pkg/praca_magisterska_video/visualize_q24.py
index d5c380a..5e3f1b5 100644
--- a/python_pkg/praca_magisterska_video/visualize_q24.py
+++ b/python_pkg/praca_magisterska_video/visualize_q24.py
@@ -11,6 +11,7 @@ Creates animated video demonstrating:
 
 from __future__ import annotations
 
+import logging
 import os
 from pathlib import Path
 
@@ -40,6 +41,8 @@ OUTPUT = str(OUTPUT_DIR / "q24_object_detection.mp4")
 
 BG_COLOR = (15, 20, 35)
 
+_logger = logging.getLogger(__name__)
+
 
 def _tc(**kwargs: object) -> TextClip:
     """TextClip wrapper that adds enough bottom margin to prevent clipping."""
@@ -203,7 +206,8 @@ def _hog_svm_demo() -> list[CompositeVideoClip]:
                     frame[ay - 1 : ay + 2, ax : ax + 20] = (150, 150, 170)
 
         # Show gradient computation example at bottom
-        if progress > 0.2:
+        gradient_phase = 0.2
+        if progress > gradient_phase:
             # Mini pixel grid showing gradient computation
             gx, gy = 100, 430
             pixels = [50, 50, 200]
@@ -366,7 +370,8 @@ def _viola_jones_demo() -> list[CompositeVideoClip]:
             (80, 620),
         ),
         (
-            "Haar: kontrast jasna/ciemna | Integral Image: suma prostokąta O(1) = 4 odczyty",
+            "Haar: kontrast jasna/ciemna | Integral Image: "
+            "suma prostokąta O(1) = 4 odczyty",
             14,
             "#78909C",
             FONT_R,
@@ -474,7 +479,8 @@ def _rcnn_evolution() -> list[CompositeVideoClip]:
         ("Faster R-CNN (2015)", 20, "#A5D6A7", FONT_B, (50, 580)),
         ("0.2 sec → 5 fps (RPN w sieci!)", 14, "#A5D6A7", FONT_R, (720, 600)),
         (
-            "Kluczowe innowacje: ROI Pooling → stały rozmiar | RPN → propozycje w sieci",
+            "Kluczowe innowacje: ROI Pooling → stały rozmiar "
+            "| RPN → propozycje w sieci",
             14,
             "#78909C",
             FONT_R,
@@ -527,13 +533,15 @@ def _rcnn_detailed() -> list[CompositeVideoClip]:
                     min(c + 50, 255) for c in color
                 )
                 # Arrow down
-                if i < 4:
+                arrow_limit = 4
+                if i < arrow_limit:
                     ax = bx + bw // 2
                     ay = by + bh + 5
                     frame[ay : ay + 20, ax - 1 : ax + 2] = (150, 150, 170)
 
         # Illustration: many overlapping regions from Selective Search
-        if progress > 0.2:
+        overlay_phase = 0.2
+        if progress > overlay_phase:
             rng_local = np.random.default_rng(42)
             n_boxes = min(int((progress - 0.2) * 15), 8)
             for i in range(n_boxes):
@@ -599,94 +607,108 @@ def _rcnn_detailed() -> list[CompositeVideoClip]:
 
 
 # ── ROI Pooling ──────────────────────────────────────────────────
+
+
+def _draw_roi_pool_grid(frame: np.ndarray) -> None:
+    """Draw the 3x3 ROI pool grid with max-pooled feature values."""
+    out_x, out_y = 400, 220
+    out_cell = 50
+    out_n = 3
+    roi_r1, roi_c1 = 2, 1
+    roi_r2, roi_c2 = 6, 5
+    roi_h = roi_r2 - roi_r1
+    roi_w = roi_c2 - roi_c1
+    for r in range(out_n):
+        for c in range(out_n):
+            x = out_x + c * out_cell
+            y = out_y + r * out_cell
+
+            # Compute the max from corresponding region
+            src_r1 = roi_r1 + r * roi_h // out_n
+            src_r2 = roi_r1 + (r + 1) * roi_h // out_n
+            src_c1 = roi_c1 + c * roi_w // out_n
+            src_c2 = roi_c1 + (c + 1) * roi_w // out_n
+            max_val = 0
+            for sr in range(src_r1, src_r2):
+                for sc in range(src_c1, src_c2):
+                    v = 30 + ((sr * 7 + sc * 13 + 42) % 40)
+                    max_val = max(max_val, v)
+
+            frame[y : y + out_cell - 2, x : x + out_cell - 2] = (
+                max_val,
+                max_val + 20,
+                max_val + 40,
+            )
+            frame[y : y + 2, x : x + out_cell - 2] = (80, 200, 120)
+            frame[y + out_cell - 4 : y + out_cell - 2, x : x + out_cell - 2] = (
+                80,
+                200,
+                120,
+            )
+
+
+def _make_roi_frame(t: float) -> np.ndarray:
+    """Render a single frame for the ROI pooling animation."""
+    frame = np.zeros((H, W, 3), dtype=np.uint8)
+    frame[:] = BG_COLOR
+    progress = min(t / (STEP_DUR * 0.7), 1.0)
+
+    # Left: feature map with ROI highlighted
+    fm_x, fm_y = 60, 180
+    fm_cell = 30
+    fm_grid = 8
+    for r in range(fm_grid):
+        for c in range(fm_grid):
+            x = fm_x + c * fm_cell
+            y = fm_y + r * fm_cell
+            # Random-looking feature values
+            val = 30 + ((r * 7 + c * 13 + 42) % 40)
+            frame[y : y + fm_cell - 1, x : x + fm_cell - 1] = (
+                val,
+                val + 10,
+                val + 20,
+            )
+
+    # ROI region highlighted
+    roi_r1, roi_c1 = 2, 1
+    roi_r2, roi_c2 = 6, 5
+    for tt in range(3):
+        ry1 = fm_y + roi_r1 * fm_cell - tt
+        ry2 = fm_y + roi_r2 * fm_cell + tt
+        rx1 = fm_x + roi_c1 * fm_cell - tt
+        rx2 = fm_x + roi_c2 * fm_cell + tt
+        frame[ry1:ry2, rx1 : rx1 + 2] = (255, 200, 50)
+        frame[ry1:ry2, rx2 - 2 : rx2] = (255, 200, 50)
+        frame[ry1 : ry1 + 2, rx1:rx2] = (255, 200, 50)
+        frame[ry2 - 2 : ry2, rx1:rx2] = (255, 200, 50)
+
+    # Arrow
+    arrow_phase = 0.3
+    if progress > arrow_phase:
+        frame[300:303, 310:380] = (150, 150, 170)
+
+    # Middle: ROI divided into 3x3 grid (output_size)
+    grid_phase = 0.3
+    if progress > grid_phase:
+        _draw_roi_pool_grid(frame)
+
+    # Arrow to FC
+    fc_phase = 0.6
+    if progress > fc_phase:
+        frame[300:303, 560:630] = (150, 150, 170)
+        # FC box
+        frame[270:340, 650:730] = (200, 100, 80)
+        frame[270:272, 650:730] = (240, 140, 120)
+        frame[338:340, 650:730] = (240, 140, 120)
+
+    return frame
+
+
 def _roi_pooling_demo() -> list[CompositeVideoClip]:
     """Animate ROI Pooling: key Fast R-CNN innovation."""
     slides = []
 
-    def make_roi_frame(t: float) -> np.ndarray:
-        frame = np.zeros((H, W, 3), dtype=np.uint8)
-        frame[:] = BG_COLOR
-        progress = min(t / (STEP_DUR * 0.7), 1.0)
-
-        # Left: feature map with ROI highlighted
-        fm_x, fm_y = 60, 180
-        fm_cell = 30
-        fm_grid = 8
-        for r in range(fm_grid):
-            for c in range(fm_grid):
-                x = fm_x + c * fm_cell
-                y = fm_y + r * fm_cell
-                # Random-looking feature values
-                val = 30 + ((r * 7 + c * 13 + 42) % 40)
-                frame[y : y + fm_cell - 1, x : x + fm_cell - 1] = (
-                    val,
-                    val + 10,
-                    val + 20,
-                )
-
-        # ROI region highlighted
-        roi_r1, roi_c1 = 2, 1
-        roi_r2, roi_c2 = 6, 5
-        for tt in range(3):
-            ry1 = fm_y + roi_r1 * fm_cell - tt
-            ry2 = fm_y + roi_r2 * fm_cell + tt
-            rx1 = fm_x + roi_c1 * fm_cell - tt
-            rx2 = fm_x + roi_c2 * fm_cell + tt
-            frame[ry1:ry2, rx1 : rx1 + 2] = (255, 200, 50)
-            frame[ry1:ry2, rx2 - 2 : rx2] = (255, 200, 50)
-            frame[ry1 : ry1 + 2, rx1:rx2] = (255, 200, 50)
-            frame[ry2 - 2 : ry2, rx1:rx2] = (255, 200, 50)
-
-        # Arrow
-        if progress > 0.3:
-            frame[300:303, 310:380] = (150, 150, 170)
-
-        # Middle: ROI divided into 3x3 grid (output_size)
-        if progress > 0.3:
-            out_x, out_y = 400, 220
-            out_cell = 50
-            out_n = 3
-            roi_h = roi_r2 - roi_r1
-            roi_w = roi_c2 - roi_c1
-            for r in range(out_n):
-                for c in range(out_n):
-                    x = out_x + c * out_cell
-                    y = out_y + r * out_cell
-
-                    # Compute the max from corresponding region
-                    src_r1 = roi_r1 + r * roi_h // out_n
-                    src_r2 = roi_r1 + (r + 1) * roi_h // out_n
-                    src_c1 = roi_c1 + c * roi_w // out_n
-                    src_c2 = roi_c1 + (c + 1) * roi_w // out_n
-                    max_val = 0
-                    for sr in range(src_r1, src_r2):
-                        for sc in range(src_c1, src_c2):
-                            v = 30 + ((sr * 7 + sc * 13 + 42) % 40)
-                            max_val = max(max_val, v)
-
-                    frame[y : y + out_cell - 2, x : x + out_cell - 2] = (
-                        max_val,
-                        max_val + 20,
-                        max_val + 40,
-                    )
-                    frame[y : y + 2, x : x + out_cell - 2] = (80, 200, 120)
-                    frame[y + out_cell - 4 : y + out_cell - 2, x : x + out_cell - 2] = (
-                        80,
-                        200,
-                        120,
-                    )
-
-        # Arrow to FC
-        if progress > 0.6:
-            frame[300:303, 560:630] = (150, 150, 170)
-            # FC box
-            frame[270:340, 650:730] = (200, 100, 80)
-            frame[270:272, 650:730] = (240, 140, 120)
-            frame[338:340, 650:730] = (240, 140, 120)
-
-        return frame
-
-    roi_clip = VideoClip(make_roi_frame, duration=STEP_DUR + 1).with_fps(FPS)
+    roi_clip = VideoClip(_make_roi_frame, duration=STEP_DUR + 1).with_fps(FPS)
     dur = STEP_DUR + 1
     labels = [
         ("ROI Pooling: kluczowa innowacja Fast R-CNN", 26, "#FFE082", FONT_B, (80, 20)),
@@ -731,7 +753,8 @@ def _roi_pooling_demo() -> list[CompositeVideoClip]:
             (80, 535),
         ),
         (
-            "Fast R-CNN: CNN raz → 1 feature mapa → ROI Pool 2000 regionów → 25x szybciej!",
+            "Fast R-CNN: CNN raz → 1 feature mapa → "
+            "ROI Pool 2000 regionów → 25x szybciej!",
             16,
             "#A5D6A7",
             FONT_R,
@@ -788,7 +811,6 @@ def _rpn_anchors_demo() -> list[CompositeVideoClip]:
 
         # Draw anchors around center: 3 sizes x 3 ratios = 9
         anchor_specs = [
-            # (half_w, half_h, color)
             (30, 30, (200, 80, 80)),  # small 1:1
             (20, 40, (200, 60, 60)),  # small 1:2
             (40, 20, (180, 60, 60)),  # small 2:1
@@ -1014,7 +1036,8 @@ def _yolo_demo() -> list[CompositeVideoClip]:
             frame[y : y + 1, img_x : img_x + img_size] = (100, 100, 120)
 
         # Highlight cells containing object centers
-        if progress > 0.3:
+        car_phase = 0.3
+        if progress > car_phase:
             # Car center ~ cell (1, 1)
             cx, cy = 1, 2
             hx = img_x + cx * cell
@@ -1023,7 +1046,8 @@ def _yolo_demo() -> list[CompositeVideoClip]:
                 frame[hy : hy + cell, hx : hx + cell].astype(int) + 40, 0, 255
             ).astype(np.uint8)
 
-        if progress > 0.5:
+        person_phase = 0.5
+        if progress > person_phase:
             # Person center ~ cell (4, 4)
             cx, cy = 4, 4
             hx = img_x + cx * cell
@@ -1033,7 +1057,8 @@ def _yolo_demo() -> list[CompositeVideoClip]:
             ).astype(np.uint8)
 
         # Bounding boxes predictions from cells
-        if progress > 0.6:
+        bbox_phase = 0.6
+        if progress > bbox_phase:
             # Car bbox
             for tt in range(2):
                 frame[
@@ -1100,7 +1125,8 @@ def _yolo_demo() -> list[CompositeVideoClip]:
             (80, 620),
         ),
         (
-            "Two-stage (R-CNN): propozycje+klasyfikacja | One-stage (YOLO): bez propozycji!",
+            "Two-stage (R-CNN): propozycje+klasyfikacja "
+            "| One-stage (YOLO): bez propozycji!",
             14,
             "#90CAF9",
             FONT_R,
@@ -1152,13 +1178,15 @@ def _yolo_architecture() -> list[CompositeVideoClip]:
                 frame[by + bh - 2 : by + bh, bx : bx + bw] = tuple(
                     min(c + 50, 255) for c in color
                 )
-                if i < 4:
+                arrow_limit = 4
+                if i < arrow_limit:
                     ax = bx + bw + 5
                     ay = by + bh // 2
                     frame[ay - 1 : ay + 2, ax : ax + 25] = (150, 150, 170)
 
         # Output tensor breakdown (right side)
-        if progress > 0.6:
+        tensor_phase = 0.6
+        if progress > tensor_phase:
             # Show SxS grid
             gx, gy = 850, 180
             gs = 120
@@ -1282,18 +1310,21 @@ def _detr_demo() -> list[CompositeVideoClip]:
                 frame[by + bh - 2 : by + bh, bx : bx + bw] = tuple(
                     min(c + 50, 255) for c in color
                 )
-                if i < 4:
+                arrow_limit = 4
+                if i < arrow_limit:
                     ax = bx + bw + 5
                     ay = by + bh // 2
                     frame[ay - 1 : ay + 2, ax : ax + 25] = (150, 150, 170)
 
         # Object queries illustration (right side)
-        if progress > 0.5:
+        query_phase = 0.5
+        if progress > query_phase:
             qx, qy = 800, 140
             for i in range(6):
                 y = qy + i * 50
                 w = 130
-                active = i < 3
+                active_limit = 3
+                active = i < active_limit
                 color = (80, 180, 120) if active else (60, 50, 50)
                 frame[y : y + 35, qx : qx + w] = color
                 frame[y : y + 1, qx : qx + w] = tuple(min(c + 40, 255) for c in color)
@@ -1528,7 +1559,8 @@ def _detr_demo() -> list[CompositeVideoClip]:
             (80, 540),
         ),
         (
-            "  R-CNN (SS+CNN+SVM+NMS) → YOLO (backbone+head+NMS) → DETR (backbone+transformer)",
+            "  R-CNN (SS+CNN+SVM+NMS) → YOLO "
+            "(backbone+head+NMS) → DETR (backbone+transformer)",
             14,
             "#90CAF9",
             FONT_R,
@@ -1572,15 +1604,18 @@ def _nms_iou_demo() -> list[CompositeVideoClip]:
         boxes.append((ox + 350, oy + 50, 100, 100, 0.40, (80, 180, 255)))
 
         for i, (bx, by, bw, bh, _conf, color) in enumerate(boxes):
-            if progress > 0.4 and i > 0 and i < 3:
+            dc = color
+            nms_phase = 0.4
+            nms_limit = 3
+            if progress > nms_phase and i > 0 and i < nms_limit:
                 # After NMS, these get removed (shown as faded/crossed)
-                color = (60, 40, 40)
+                dc = (60, 40, 40)
 
             for tt in range(2):
-                frame[by - tt : by + bh + tt, bx - tt : bx - tt + 2] = color
-                frame[by - tt : by + bh + tt, bx + bw + tt - 2 : bx + bw + tt] = color
-                frame[by - tt : by - tt + 2, bx - tt : bx + bw + tt] = color
-                frame[by + bh + tt - 2 : by + bh + tt, bx - tt : bx + bw + tt] = color
+                frame[by - tt : by + bh + tt, bx - tt : bx - tt + 2] = dc
+                frame[by - tt : by + bh + tt, bx + bw + tt - 2 : bx + bw + tt] = dc
+                frame[by - tt : by - tt + 2, bx - tt : bx + bw + tt] = dc
+                frame[by + bh + tt - 2 : by + bh + tt, bx - tt : bx + bw + tt] = dc
 
         # IoU visualization on right side
         iou_x, iou_y = 700, 200
@@ -1884,7 +1919,7 @@ def main() -> None:
     final.write_videofile(
         OUTPUT, fps=FPS, codec="libx264", audio=False, preset="medium", threads=4
     )
-    print(f"Video saved to: {OUTPUT}")
+    _logger.info("Video saved to: %s", OUTPUT)
 
 
 if __name__ == "__main__":