#!/usr/bin/env python3 """ Generate ALL diagrams for PYTANIE 20: Analityka danych strumieniowych. Monochrome, A4-printable PNGs (300 DPI). """ import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt import matplotlib.patches as mpatches from matplotlib.patches import FancyBboxPatch import numpy as np import os DPI = 300 BG = 'white' LN = 'black' FS = 8 FS_TITLE = 11 FS_SMALL = 6.5 FS_LABEL = 9 OUTPUT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'img') os.makedirs(OUTPUT_DIR, exist_ok=True) GRAY1 = '#E8E8E8' GRAY2 = '#D0D0D0' GRAY3 = '#B8B8B8' GRAY4 = '#F5F5F5' GRAY5 = '#C0C0C0' def draw_box(ax, x, y, w, h, text, fill='white', lw=1.2, fontsize=FS, fontweight='normal', ha='center', va='center', rounded=True, edgecolor=LN, linestyle='-'): if rounded: rect = FancyBboxPatch((x, y), w, h, boxstyle="round,pad=0.05", lw=lw, edgecolor=edgecolor, facecolor=fill, linestyle=linestyle) else: rect = mpatches.Rectangle((x, y), w, h, lw=lw, edgecolor=edgecolor, facecolor=fill, linestyle=linestyle) ax.add_patch(rect) ax.text(x + w/2, y + h/2, text, ha=ha, va=va, fontsize=fontsize, fontweight=fontweight, wrap=True) def draw_arrow(ax, x1, y1, x2, y2, lw=1.2, style='->', color=LN): ax.annotate("", xy=(x2, y2), xytext=(x1, y1), arrowprops=dict(arrowstyle=style, color=color, lw=lw)) def save_fig(fig, name): path = os.path.join(OUTPUT_DIR, name) fig.savefig(path, dpi=DPI, bbox_inches='tight', facecolor=BG, pad_inches=0.15) plt.close(fig) print(f" Saved: {path}") def draw_table(ax, headers, rows, x0, y0, col_widths, row_h=0.4, header_fill=GRAY2, row_fills=None, fontsize=FS, header_fontsize=None): if header_fontsize is None: header_fontsize = fontsize n_cols = len(headers) # Header cx = x0 for j, hdr in enumerate(headers): draw_box(ax, cx, y0, col_widths[j], row_h, hdr, fill=header_fill, fontsize=header_fontsize, fontweight='bold', rounded=False) cx += col_widths[j] # Rows for i, row in enumerate(rows): cy = y0 - (i + 1) * row_h cx = x0 fill = GRAY4 if (i % 2 == 0) else 'white' if row_fills and i < len(row_fills): fill = row_fills[i] for j, cell in enumerate(row): fw = 'bold' if j == 0 else 'normal' draw_box(ax, cx, cy, col_widths[j], row_h, cell, fill=fill, fontsize=fontsize, fontweight=fw, rounded=False) cx += col_widths[j] # ============================================================ # 1. Batch vs Streaming concept # ============================================================ def gen_batch_vs_streaming(): fig, axes = plt.subplots(2, 1, figsize=(9, 5)) fig.suptitle('Batch vs Streaming — dwa modele przetwarzania', fontsize=FS_TITLE, fontweight='bold') # Batch ax = axes[0] ax.set_xlim(0, 12) ax.set_ylim(0, 3) ax.set_aspect('auto') ax.axis('off') ax.set_title('BATCH (wsadowe)', fontsize=FS_LABEL, fontweight='bold') # Data collected draw_box(ax, 0.5, 0.8, 3.0, 1.4, 'Zbierz WSZYSTKIE\ndane\n(godziny / dni)', fill=GRAY1, fontsize=FS, fontweight='bold') draw_arrow(ax, 3.5, 1.5, 4.5, 1.5, lw=2) draw_box(ax, 4.5, 0.8, 2.5, 1.4, 'Analiza\n(batch job)', fill=GRAY2, fontsize=FS, fontweight='bold') draw_arrow(ax, 7.0, 1.5, 8.0, 1.5, lw=2) draw_box(ax, 8.0, 0.8, 2.5, 1.4, 'Wynik\n(jednorazowy)', fill=GRAY3, fontsize=FS, fontweight='bold') ax.text(11.0, 1.5, 'min-h', fontsize=FS, va='center', fontweight='bold') # Streaming ax = axes[1] ax.set_xlim(0, 12) ax.set_ylim(0, 3) ax.set_aspect('auto') ax.axis('off') ax.set_title('STREAMING (strumieniowe)', fontsize=FS_LABEL, fontweight='bold') # Events flowing events_x = [0.5, 1.5, 2.5, 3.5] for i, ex in enumerate(events_x): draw_box(ax, ex, 1.0, 0.8, 0.8, f'e{i+1}', fill=GRAY4, fontsize=FS, fontweight='bold', rounded=False) if i < len(events_x) - 1: draw_arrow(ax, ex + 0.8, 1.4, ex + 1.0, 1.4, lw=1) ax.text(4.8, 1.4, '...', fontsize=FS_LABEL, va='center') draw_arrow(ax, 5.2, 1.4, 5.8, 1.4, lw=2) draw_box(ax, 5.8, 0.8, 2.8, 1.4, 'Analiza\nCIĄGŁA\n(event-by-event)', fill=GRAY2, fontsize=FS, fontweight='bold') draw_arrow(ax, 8.6, 1.5, 9.3, 1.5, lw=2) draw_box(ax, 9.3, 0.8, 2.0, 1.4, 'Wyniki\nciągłe', fill=GRAY3, fontsize=FS, fontweight='bold') ax.text(11.5, 0.5, 'ms-s', fontsize=FS, va='center', fontweight='bold') # Arrow marking infinity ax.annotate('', xy=(0.2, 1.4), xytext=(-0.3, 1.4), arrowprops=dict(arrowstyle='->', lw=1.5, color=LN)) ax.text(0.0, 2.3, '∞ zdarzeń', fontsize=FS_SMALL, ha='center', style='italic') fig.tight_layout(rect=[0, 0, 1, 0.92]) save_fig(fig, 'q20_batch_vs_streaming.png') # ============================================================ # 2. All 4 window types (TSSG) # ============================================================ def gen_window_types(): fig, axes = plt.subplots(4, 1, figsize=(9, 10)) fig.suptitle('4 typy okien — TSSG', fontsize=FS_TITLE, fontweight='bold') # Events on a timeline (shared concept) events = list(range(1, 13)) # --- Tumbling --- ax = axes[0] ax.set_xlim(0, 14) ax.set_ylim(0, 4) ax.set_aspect('auto') ax.axis('off') ax.set_title('Tumbling Window (okno przerzutne) — rozłączne, stały rozmiar', fontsize=FS_LABEL, fontweight='bold') # Time axis ax.annotate('', xy=(13.5, 1.0), xytext=(0.3, 1.0), arrowprops=dict(arrowstyle='->', lw=1.5, color=LN)) ax.text(13.5, 0.6, 'czas', fontsize=FS_SMALL, ha='center') # Events for i, e in enumerate(events): x = 1.0 + i * 1.0 ax.plot(x, 1.0, 'ko', markersize=5) ax.text(x, 0.5, f'e{e}', fontsize=FS_SMALL, ha='center') # Windows colors_w = [GRAY1, GRAY3, GRAY1, GRAY3] for w in range(4): x_start = 1.0 + w * 3.0 - 0.3 rect = mpatches.FancyBboxPatch((x_start, 1.5), 3.0, 1.2, boxstyle="round,pad=0.1", facecolor=colors_w[w], edgecolor=LN, lw=1.5) ax.add_patch(rect) ax.text(x_start + 1.5, 2.1, f'Okno {w+1}', fontsize=FS, ha='center', fontweight='bold') # Braces down to events for j in range(3): ex = 1.0 + w * 3.0 + j * 1.0 ax.plot([ex, ex], [1.0, 1.5], color=LN, lw=0.8, linestyle='--') ax.text(7.0, 3.2, 'Każde zdarzenie → DOKŁADNIE 1 okno. Zero nakładania.', fontsize=FS, ha='center', style='italic', bbox=dict(boxstyle='round,pad=0.2', facecolor=GRAY4, edgecolor=GRAY5)) # --- Sliding --- ax = axes[1] ax.set_xlim(0, 14) ax.set_ylim(0, 5) ax.set_aspect('auto') ax.axis('off') ax.set_title('Sliding Window (okno przesuwne) — nakładające, stały rozmiar + krok', fontsize=FS_LABEL, fontweight='bold') ax.annotate('', xy=(13.5, 1.0), xytext=(0.3, 1.0), arrowprops=dict(arrowstyle='->', lw=1.5, color=LN)) ax.text(13.5, 0.6, 'czas', fontsize=FS_SMALL, ha='center') for i, e in enumerate(events[:8]): x = 1.0 + i * 1.0 ax.plot(x, 1.0, 'ko', markersize=5) ax.text(x, 0.5, f'e{e}', fontsize=FS_SMALL, ha='center') # Sliding windows: size=4, slide=2 slide_colors = [GRAY1, GRAY2, GRAY3] for w in range(3): x_start = 0.7 + w * 2.0 y_base = 1.5 + w * 0.9 rect = mpatches.FancyBboxPatch((x_start, y_base), 4.0, 0.7, boxstyle="round,pad=0.08", facecolor=slide_colors[w], edgecolor=LN, lw=1.5, alpha=0.7) ax.add_patch(rect) ax.text(x_start + 2.0, y_base + 0.35, f'Okno {w+1} (size=4)', fontsize=FS_SMALL, ha='center', fontweight='bold') ax.text(10.5, 3.5, 'krok=2\nNakładanie!\ne3,e4 → w oknie 1 i 2', fontsize=FS, ha='center', style='italic', bbox=dict(boxstyle='round,pad=0.2', facecolor=GRAY4, edgecolor=GRAY5)) # --- Session --- ax = axes[2] ax.set_xlim(0, 14) ax.set_ylim(0, 4) ax.set_aspect('auto') ax.axis('off') ax.set_title('Session Window (okno sesji) — dynamiczny rozmiar, gap = przerwa', fontsize=FS_LABEL, fontweight='bold') ax.annotate('', xy=(13.5, 1.0), xytext=(0.3, 1.0), arrowprops=dict(arrowstyle='->', lw=1.5, color=LN)) ax.text(13.5, 0.6, 'czas', fontsize=FS_SMALL, ha='center') # Cluster 1: events close together cluster1 = [1.0, 1.8, 2.3, 3.0] for x in cluster1: ax.plot(x, 1.0, 'ko', markersize=5) # Gap ax.annotate('', xy=(7.0, 0.7), xytext=(4.0, 0.7), arrowprops=dict(arrowstyle='<->', lw=1, color=LN)) ax.text(5.5, 0.3, 'GAP > timeout', fontsize=FS, ha='center', fontweight='bold', style='italic') # Cluster 2 cluster2 = [8.0, 8.8, 9.5] for x in cluster2: ax.plot(x, 1.0, 'ko', markersize=5) # Session boxes rect1 = mpatches.FancyBboxPatch((0.7, 1.4), 2.6, 1.0, boxstyle="round,pad=0.1", facecolor=GRAY1, edgecolor=LN, lw=1.5) ax.add_patch(rect1) ax.text(2.0, 1.9, 'Sesja 1\n(4 zdarzenia)', fontsize=FS, ha='center', fontweight='bold') rect2 = mpatches.FancyBboxPatch((7.7, 1.4), 2.1, 1.0, boxstyle="round,pad=0.1", facecolor=GRAY3, edgecolor=LN, lw=1.5) ax.add_patch(rect2) ax.text(8.75, 1.9, 'Sesja 2\n(3 zdarzenia)', fontsize=FS, ha='center', fontweight='bold') ax.text(5.5, 3.0, 'Nowa sesja po przerwie > gap', fontsize=FS, ha='center', style='italic') # --- Global --- ax = axes[3] ax.set_xlim(0, 14) ax.set_ylim(0, 4) ax.set_aspect('auto') ax.axis('off') ax.set_title('Global Window — jedno okno na cały strumień + trigger', fontsize=FS_LABEL, fontweight='bold') ax.annotate('', xy=(13.5, 1.0), xytext=(0.3, 1.0), arrowprops=dict(arrowstyle='->', lw=1.5, color=LN)) ax.text(13.5, 0.6, 'czas', fontsize=FS_SMALL, ha='center') for i in range(12): x = 1.0 + i * 1.0 ax.plot(x, 1.0, 'ko', markersize=5) # One big window rect = mpatches.FancyBboxPatch((0.5, 1.4), 12.5, 1.0, boxstyle="round,pad=0.1", facecolor=GRAY1, edgecolor=LN, lw=2) ax.add_patch(rect) ax.text(6.75, 1.9, 'GLOBAL WINDOW (cały strumień)', fontsize=FS, ha='center', fontweight='bold') # Trigger markers for tx in [4.0, 8.0, 12.0]: ax.plot([tx, tx], [1.4, 2.4], color=LN, lw=2, linestyle='--') ax.text(tx, 2.7, 'EMIT', fontsize=FS_SMALL, ha='center', fontweight='bold', bbox=dict(boxstyle='round,pad=0.1', facecolor=GRAY3, edgecolor=LN)) ax.text(6.75, 3.3, 'Trigger decyduje kiedy emitować (np. co N zdarzeń)', fontsize=FS, ha='center', style='italic') fig.tight_layout(rect=[0, 0, 1, 0.94]) save_fig(fig, 'q20_window_types.png') # ============================================================ # 3. Event Time vs Processing Time scatter + watermark # ============================================================ def gen_event_vs_processing_time(): fig, axes = plt.subplots(1, 2, figsize=(11, 5)) fig.suptitle('Event Time vs Processing Time + Watermark', fontsize=FS_TITLE, fontweight='bold') # --- Panel 1: Ideal vs Real --- ax = axes[0] ax.set_xlim(0, 10) ax.set_ylim(0, 10) ax.set_aspect('equal') ax.set_xlabel('Event Time', fontsize=FS_LABEL) ax.set_ylabel('Processing Time', fontsize=FS_LABEL) ax.set_title('Idealny vs Realny świat', fontsize=FS_LABEL, fontweight='bold') ax.set_xticks([]) ax.set_yticks([]) # Ideal line ax.plot([0, 9], [0, 9], 'k--', lw=1.5, label='ideał (brak opóźnień)') # Real scattered points (processing >= event, some out of order) np.random.seed(42) event_times = np.sort(np.random.uniform(1, 8, 15)) proc_times = event_times + np.random.exponential(0.5, 15) # Make some out of order idx = [3, 7, 11] for i in idx: proc_times[i] += 1.5 ax.scatter(event_times, proc_times, c='black', s=30, zorder=5, label='zdarzenia (realne)') # Highlight out-of-order for i in idx: ax.annotate('out-of-order', xy=(event_times[i], proc_times[i]), xytext=(event_times[i] + 0.8, proc_times[i] + 0.5), fontsize=FS_SMALL, ha='left', arrowprops=dict(arrowstyle='->', lw=0.8, color='#555')) ax.legend(fontsize=FS_SMALL, loc='upper left') ax.text(7, 2, 'Opóźnienie\nsieciowe ↑', fontsize=FS, ha='center', style='italic', color='#555') # --- Panel 2: Watermark concept --- ax = axes[1] ax.set_xlim(0, 10) ax.set_ylim(0, 10) ax.set_aspect('equal') ax.set_xlabel('Event Time', fontsize=FS_LABEL) ax.set_ylabel('Processing Time', fontsize=FS_LABEL) ax.set_title('Watermark — granica postępu', fontsize=FS_LABEL, fontweight='bold') ax.set_xticks([]) ax.set_yticks([]) # Events ax.scatter(event_times, proc_times, c='black', s=30, zorder=5) # Watermark line (below most points, tracks progress) wm_x = np.linspace(0, 9, 50) wm_y = wm_x + 0.3 # watermark slightly above ideal ax.plot(wm_x, wm_y, 'k-', lw=2.5, label='Watermark') ax.fill_between(wm_x, 0, wm_y, alpha=0.15, color='gray') ax.text(2.0, 1.0, 'PONIŻEJ watermark:\n„na pewno dotarło"', fontsize=FS, ha='center', fontweight='bold', bbox=dict(boxstyle='round,pad=0.2', facecolor=GRAY4, edgecolor=GRAY5)) # Late event late_x, late_y = event_times[7], proc_times[7] ax.scatter([late_x], [late_y], c='white', s=80, zorder=6, edgecolors='black', linewidths=2) ax.annotate('LATE DATA!\n(po watermarku)', xy=(late_x, late_y), xytext=(late_x + 1.2, late_y + 0.8), fontsize=FS_SMALL, ha='left', fontweight='bold', arrowprops=dict(arrowstyle='->', lw=1, color=LN)) ax.legend(fontsize=FS_SMALL, loc='upper left') fig.tight_layout(rect=[0, 0, 1, 0.92]) save_fig(fig, 'q20_event_vs_processing_time.png') # ============================================================ # 4. Tumbling window example — fraud detection # ============================================================ def gen_tumbling_fraud(): fig, ax = plt.subplots(figsize=(9, 4)) ax.set_xlim(0, 12) ax.set_ylim(0, 5.5) ax.set_aspect('auto') ax.axis('off') ax.set_title('Tumbling Window — fraud detection (okno = 1 min)', fontsize=FS_TITLE, fontweight='bold', pad=10) # Time axis ax.annotate('', xy=(11.5, 1.0), xytext=(0.5, 1.0), arrowprops=dict(arrowstyle='->', lw=1.5, color=LN)) ax.text(6.0, 0.4, 'czas', fontsize=FS, ha='center') # Window 1: normal draw_box(ax, 1.0, 1.5, 4.5, 3.0, '', fill=GRAY4, rounded=True, lw=2) ax.text(3.25, 4.2, '[14:00 — 14:01]', fontsize=FS_LABEL, ha='center', fontweight='bold') # Transactions txns1 = ['Sklep A: 50 zł', 'Sklep B: 30 zł', 'Stacja: 80 zł'] for i, t in enumerate(txns1): draw_box(ax, 1.3, 3.3 - i * 0.55, 4.0, 0.45, t, fill=GRAY1, fontsize=FS_SMALL, rounded=False) ax.text(3.25, 1.7, 'count = 3 → OK', fontsize=FS, ha='center', fontweight='bold', color='#2E7D32', bbox=dict(boxstyle='round,pad=0.15', facecolor='#E8F5E9', edgecolor='#2E7D32')) # Window 2: fraud! draw_box(ax, 6.0, 1.5, 4.5, 3.0, '', fill=GRAY1, rounded=True, lw=2) ax.text(8.25, 4.2, '[14:01 — 14:02]', fontsize=FS_LABEL, ha='center', fontweight='bold') txns2 = ['ATM Warszawa: 500 zł', 'ATM Kraków: 500 zł', '... +45 transakcji'] for i, t in enumerate(txns2): draw_box(ax, 6.3, 3.3 - i * 0.55, 4.0, 0.45, t, fill=GRAY3, fontsize=FS_SMALL, rounded=False) ax.text(8.25, 1.7, 'count = 47 → ALERT!', fontsize=FS, ha='center', fontweight='bold', color='#C62828', bbox=dict(boxstyle='round,pad=0.15', facecolor='#F8D7DA', edgecolor='#C62828')) save_fig(fig, 'q20_tumbling_fraud.png') # ============================================================ # 5. Sliding window — SLA monitoring # ============================================================ def gen_sliding_sla(): fig, ax = plt.subplots(figsize=(9, 4.5)) ax.set_xlim(0, 12) ax.set_ylim(0, 6) ax.set_aspect('auto') ax.axis('off') ax.set_title('Sliding Window — monitoring SLA (okno=5min, krok=1min)', fontsize=FS_TITLE, fontweight='bold', pad=10) # Time axis ax.annotate('', xy=(11.5, 0.5), xytext=(0.5, 0.5), arrowprops=dict(arrowstyle='->', lw=1.5, color=LN)) times = ['14:05', '14:06', '14:07', '14:08', '14:09'] latencies = [120, 180, 340, 290, 150] sla = 200 for i, (t, lat) in enumerate(zip(times, latencies)): x = 1.5 + i * 2.0 ax.text(x, 0.1, t, fontsize=FS, ha='center') # Bar proportional to latency bar_h = lat / 100.0 is_breach = lat > sla fill = '#F8D7DA' if is_breach else GRAY1 edge = '#C62828' if is_breach else LN draw_box(ax, x - 0.5, 1.0, 1.0, bar_h, '', fill=fill, rounded=False, edgecolor=edge, lw=1.5) ax.text(x, 1.0 + bar_h + 0.15, f'{lat}ms', fontsize=FS, ha='center', fontweight='bold', color='#C62828' if is_breach else LN) # Status status = 'ALERT!' if is_breach else 'OK' ax.text(x, 1.0 + bar_h + 0.55, status, fontsize=FS_SMALL, ha='center', fontweight='bold', color='#C62828' if is_breach else '#2E7D32') # SLA line sla_y = 1.0 + sla / 100.0 ax.plot([0.8, 11.2], [sla_y, sla_y], 'k--', lw=1.5) ax.text(11.3, sla_y, f'SLA={sla}ms', fontsize=FS, va='center', fontweight='bold') # Sliding window bracket ax.annotate('', xy=(1.0, 5.3), xytext=(5.0, 5.3), arrowprops=dict(arrowstyle='<->', lw=1.5, color=LN)) ax.text(3.0, 5.6, 'okno = 5 min', fontsize=FS, ha='center', fontweight='bold') ax.annotate('', xy=(3.0, 4.8), xytext=(5.0, 4.8), arrowprops=dict(arrowstyle='<->', lw=1, color='#555')) ax.text(4.0, 4.4, 'krok = 1 min\n(nakładanie!)', fontsize=FS_SMALL, ha='center', style='italic') save_fig(fig, 'q20_sliding_sla.png') # ============================================================ # 6. Session window — user sessions # ============================================================ def gen_session_users(): fig, axes = plt.subplots(2, 1, figsize=(10, 5)) fig.suptitle('Session Window — sesje użytkowników (gap = 30 min)', fontsize=FS_TITLE, fontweight='bold') # Anna: 2 sessions ax = axes[0] ax.set_xlim(0, 14) ax.set_ylim(0, 3.5) ax.set_aspect('auto') ax.axis('off') ax.set_title('Użytkownik Anna', fontsize=FS_LABEL, fontweight='bold') ax.annotate('', xy=(13.5, 1.0), xytext=(0.3, 1.0), arrowprops=dict(arrowstyle='->', lw=1.5, color=LN)) # Clicks cluster 1 for x in [1.0, 1.8, 2.5, 3.2]: ax.plot(x, 1.0, 'ko', markersize=6) # Clicks cluster 2 for x in [9.0, 9.8, 10.5]: ax.plot(x, 1.0, 'ko', markersize=6) # Sessions rect1 = mpatches.FancyBboxPatch((0.7, 1.5), 2.8, 1.2, boxstyle="round,pad=0.1", facecolor=GRAY1, edgecolor=LN, lw=1.5) ax.add_patch(rect1) ax.text(2.1, 2.1, 'Sesja 1\n4 kliknięcia, 12 min', fontsize=FS, ha='center', fontweight='bold') rect2 = mpatches.FancyBboxPatch((8.7, 1.5), 2.1, 1.2, boxstyle="round,pad=0.1", facecolor=GRAY3, edgecolor=LN, lw=1.5) ax.add_patch(rect2) ax.text(9.75, 2.1, 'Sesja 2\n3 kliknięcia, 8 min', fontsize=FS, ha='center', fontweight='bold') # Gap ax.annotate('', xy=(8.5, 0.5), xytext=(3.8, 0.5), arrowprops=dict(arrowstyle='<->', lw=1.5, color=LN)) ax.text(6.15, 0.1, 'cisza 45 min > gap(30)', fontsize=FS, ha='center', fontweight='bold', style='italic') # Bob: 1 session ax = axes[1] ax.set_xlim(0, 14) ax.set_ylim(0, 3.5) ax.set_aspect('auto') ax.axis('off') ax.set_title('Użytkownik Bob', fontsize=FS_LABEL, fontweight='bold') ax.annotate('', xy=(13.5, 1.0), xytext=(0.3, 1.0), arrowprops=dict(arrowstyle='->', lw=1.5, color=LN)) # Clicks spread evenly bobs = [1.0, 2.5, 4.0, 5.5, 7.0, 8.5, 10.0] for x in bobs: ax.plot(x, 1.0, 'ko', markersize=6) rect = mpatches.FancyBboxPatch((0.7, 1.5), 9.6, 1.2, boxstyle="round,pad=0.1", facecolor=GRAY1, edgecolor=LN, lw=2) ax.add_patch(rect) ax.text(5.5, 2.1, 'Sesja 1 (ciągła) — 7 kliknięć, każde < 30 min od poprzedniego', fontsize=FS, ha='center', fontweight='bold') fig.tight_layout(rect=[0, 0, 1, 0.92]) save_fig(fig, 'q20_session_users.png') # ============================================================ # 7. Streaming ecosystem overview # ============================================================ def gen_streaming_ecosystem(): fig, ax = plt.subplots(figsize=(10, 5.5)) ax.set_xlim(0, 12) ax.set_ylim(0, 7) ax.set_aspect('auto') ax.axis('off') ax.set_title('Ekosystem przetwarzania strumieniowego', fontsize=FS_TITLE, fontweight='bold', pad=10) # Source draw_box(ax, 0.3, 2.5, 2.0, 3.0, 'Kafka\nTopics\n(źródło)', fill=GRAY2, fontsize=FS, fontweight='bold') # Engines engines = [ ('Kafka Streams\n(library w JVM)', GRAY1, 4.7), ('Apache Flink\n(klaster)', GRAY3, 3.2), ('Spark Streaming\n(klaster)', GRAY5, 1.7), ] for label, color, y in engines: draw_box(ax, 4.0, y, 3.0, 1.2, label, fill=color, fontsize=FS, fontweight='bold') draw_arrow(ax, 2.3, 4.0, 4.0, y + 0.6, lw=1.5) # Sinks sinks = [ ('Kafka topic\n/ baza danych', GRAY4, 4.7), ('DB / Kafka\n/ S3', GRAY4, 3.2), ('HDFS / DB\n/ dashboard', GRAY4, 1.7), ] for label, color, y in sinks: draw_box(ax, 8.5, y, 2.5, 1.2, label, fill=color, fontsize=FS) draw_arrow(ax, 7.0, y + 0.6, 8.5, y + 0.6, lw=1.5) # Labels ax.text(1.3, 6.0, 'ŹRÓDŁO', fontsize=FS_LABEL, ha='center', fontweight='bold') ax.text(5.5, 6.2, 'SILNIK', fontsize=FS_LABEL, ha='center', fontweight='bold') ax.text(9.75, 6.2, 'WYNIK', fontsize=FS_LABEL, ha='center', fontweight='bold') # Latency annotations ax.text(5.5, 5.95, '~1-10 ms', fontsize=FS_SMALL, ha='center', style='italic') ax.text(5.5, 4.5, '<10 ms', fontsize=FS_SMALL, ha='center', style='italic') ax.text(5.5, 3.0, '~100 ms', fontsize=FS_SMALL, ha='center', style='italic') save_fig(fig, 'q20_streaming_ecosystem.png') # ============================================================ # 8. True streaming vs Micro-batch # ============================================================ def gen_true_vs_microbatch(): fig, axes = plt.subplots(2, 1, figsize=(10, 5.5)) fig.suptitle('True Streaming vs Micro-Batch', fontsize=FS_TITLE, fontweight='bold') # True streaming ax = axes[0] ax.set_xlim(0, 12) ax.set_ylim(0, 3.5) ax.set_aspect('auto') ax.axis('off') ax.set_title('TRUE STREAMING (Flink, Kafka Streams) — event-by-event', fontsize=FS_LABEL, fontweight='bold') for i in range(6): x = 1.0 + i * 1.8 # Event draw_box(ax, x, 2.0, 0.8, 0.7, f'e{i+1}', fill=GRAY1, fontsize=FS, fontweight='bold', rounded=False) # Arrow down draw_arrow(ax, x + 0.4, 2.0, x + 0.4, 1.4, lw=1) # Result draw_box(ax, x, 0.5, 0.8, 0.7, f'r{i+1}', fill=GRAY3, fontsize=FS, fontweight='bold', rounded=False) # Latency label ax.text(x + 0.4, 1.6, '~ms', fontsize=5, ha='center', color='#555') ax.text(11.5, 1.3, 'Latencja:\n< 10 ms', fontsize=FS, ha='center', fontweight='bold', bbox=dict(boxstyle='round,pad=0.2', facecolor=GRAY4, edgecolor=GRAY5)) # Micro-batch ax = axes[1] ax.set_xlim(0, 12) ax.set_ylim(0, 3.5) ax.set_aspect('auto') ax.axis('off') ax.set_title('MICRO-BATCH (Spark Streaming) — grupami co ~100ms', fontsize=FS_LABEL, fontweight='bold') batch_colors = [GRAY1, GRAY2, GRAY3] for b in range(3): bx = 0.8 + b * 3.5 # Batch boundary draw_box(ax, bx, 1.8, 3.0, 1.0, '', fill=batch_colors[b], rounded=True, lw=1.5) ax.text(bx + 1.5, 2.6, f'Batch {b+1}', fontsize=FS, ha='center', fontweight='bold') for j in range(3): ex = bx + 0.3 + j * 0.9 draw_box(ax, ex, 2.0, 0.7, 0.5, f'e{b*3+j+1}', fill='white', fontsize=FS_SMALL, rounded=False) # Arrow down draw_arrow(ax, bx + 1.5, 1.8, bx + 1.5, 1.2, lw=1.5) # Result draw_box(ax, bx + 0.5, 0.4, 2.0, 0.7, f'result {b+1}', fill=GRAY4, fontsize=FS, fontweight='bold') ax.text(11.5, 1.3, 'Latencja:\n~100ms–s', fontsize=FS, ha='center', fontweight='bold', bbox=dict(boxstyle='round,pad=0.2', facecolor=GRAY4, edgecolor=GRAY5)) fig.tight_layout(rect=[0, 0, 1, 0.92]) save_fig(fig, 'q20_true_vs_microbatch.png') # ============================================================ # 9. Platform comparison table # ============================================================ def gen_platform_comparison(): fig, ax = plt.subplots(figsize=(9, 5)) ax.set_xlim(0, 11.5) ax.set_ylim(-6, 1) ax.set_aspect('auto') ax.axis('off') ax.set_title('Porównanie platform strumieniowych', fontsize=FS_TITLE, fontweight='bold', pad=10) headers = ['Cecha', 'Kafka Streams', 'Apache Flink', 'Spark Streaming'] col_w = [2.5, 2.8, 2.8, 2.8] rows = [ ['Model', 'event-by-event', 'event-by-event', 'micro-batch (~100ms)'], ['Deployment', 'library (w JVM)', 'klaster', 'klaster'], ['Latencja', '~1–10 ms', '< 10 ms', '100 ms – sekundy'], ['Exactly-once', 'Kafka TXN', 'checkpointing', 'WAL'], ['State', 'RocksDB local', 'RocksDB + ckpt', 'in-memory / ext'], ['Okna', 'T, S, Session', 'wszystkie + custom', 'T, S'], ['Use case', 'Kafka → Kafka', 'złożona analityka', 'ETL + ML / SQL'], ] draw_table(ax, headers, rows, x0=0.25, y0=0.5, col_widths=col_w, row_h=0.6, fontsize=7, header_fontsize=8) save_fig(fig, 'q20_platform_comparison.png') # ============================================================ # 10. Kafka Streams architecture # ============================================================ def gen_kafka_streams_arch(): fig, ax = plt.subplots(figsize=(9, 5)) ax.set_xlim(0, 12) ax.set_ylim(0, 7) ax.set_aspect('auto') ax.axis('off') ax.set_title('Kafka Streams — architektura (library w JVM)', fontsize=FS_TITLE, fontweight='bold', pad=10) # Outer box: Your Java application draw_box(ax, 0.5, 0.5, 11.0, 5.5, '', fill=GRAY4, rounded=True, lw=2.5) ax.text(6.0, 5.7, 'Twoja aplikacja Java (JVM)', fontsize=FS_LABEL, ha='center', fontweight='bold') # Kafka Consumer draw_box(ax, 1.0, 3.0, 2.5, 1.5, 'Kafka\nConsumer\n(input topic)', fill=GRAY1, fontsize=FS, fontweight='bold') # Processing draw_box(ax, 4.5, 3.0, 2.5, 1.5, 'Kafka Streams\n(logika\nbiznesowa)', fill=GRAY2, fontsize=FS, fontweight='bold') # Kafka Producer draw_box(ax, 8.0, 3.0, 2.5, 1.5, 'Kafka\nProducer\n(output topic)', fill=GRAY1, fontsize=FS, fontweight='bold') # Arrows draw_arrow(ax, 3.5, 3.75, 4.5, 3.75, lw=2) draw_arrow(ax, 7.0, 3.75, 8.0, 3.75, lw=2) # RocksDB state store draw_box(ax, 4.5, 1.0, 2.5, 1.3, 'RocksDB\n(stan lokalny)', fill=GRAY3, fontsize=FS, fontweight='bold') ax.plot([5.75, 5.75], [3.0, 2.3], color=LN, lw=1.5) ax.text(7.3, 1.6, 'okna, joiny,\nagregacje', fontsize=FS_SMALL, style='italic', va='center') # Key message ax.text(6.0, 0.2, 'NIE potrzebujesz osobnego klastra! ' 'Skalujesz = więcej instancji JVM.', fontsize=FS, ha='center', fontweight='bold', bbox=dict(boxstyle='round,pad=0.2', facecolor='white', edgecolor=LN)) save_fig(fig, 'q20_kafka_streams_arch.png') # ============================================================ # 11. Flink architecture + checkpointing # ============================================================ def gen_flink_arch(): fig, ax = plt.subplots(figsize=(9, 6)) ax.set_xlim(0, 12) ax.set_ylim(0, 8) ax.set_aspect('auto') ax.axis('off') ax.set_title('Apache Flink — architektura klastra + checkpointing', fontsize=FS_TITLE, fontweight='bold', pad=10) # Cluster border draw_box(ax, 0.3, 1.0, 11.4, 6.2, '', fill=GRAY4, rounded=True, lw=2.5) ax.text(6.0, 6.95, 'FLINK CLUSTER', fontsize=FS_LABEL, ha='center', fontweight='bold') # Job Manager draw_box(ax, 1.0, 5.5, 3.0, 1.2, 'Job Manager\n(koordynacja,\ncheckpointy)', fill=GRAY2, fontsize=FS, fontweight='bold') # Task Managers draw_box(ax, 1.0, 3.0, 10.0, 2.0, '', fill='white', rounded=True, lw=1.5) ax.text(6.0, 4.7, 'Task Managers (workery)', fontsize=FS, ha='center', fontweight='bold') slots = ['source\n& map()', 'map()', 'window()\n& reduce', 'sink()'] for i, s in enumerate(slots): x = 1.5 + i * 2.4 draw_box(ax, x, 3.3, 2.0, 1.2, f'Slot {i+1}\n{s}', fill=GRAY1, fontsize=FS_SMALL, fontweight='bold') draw_arrow(ax, 2.5, 5.5, 6.0, 5.0, lw=1.5, style='->') ax.text(5.0, 5.5, 'przydziela\npodzadania', fontsize=FS_SMALL, style='italic') # Checkpoint storage draw_box(ax, 5.5, 1.2, 3.5, 1.2, 'Checkpoint Storage\n(HDFS / S3)', fill=GRAY3, fontsize=FS, fontweight='bold') ax.plot([7.25, 7.25], [2.4, 3.3], color=LN, lw=1.5, linestyle='--') ax.text(8.0, 2.7, 'snapshoty\nstanu', fontsize=FS_SMALL, style='italic') # Barrier concept at bottom ax.text(3.0, 1.6, 'Barrier:', fontsize=FS, fontweight='bold') barrier_boxes = ['source', '|B|', 'map', '|B|', 'sink'] bx = 0.8 for i, b in enumerate(barrier_boxes): if b == '|B|': ax.text(bx + 0.3, 1.5, b, fontsize=FS, ha='center', fontweight='bold', bbox=dict(boxstyle='round,pad=0.1', facecolor=GRAY5, edgecolor=LN)) draw_arrow(ax, bx, 1.5, bx + 0.1, 1.5, lw=1) bx += 0.7 else: draw_box(ax, bx, 1.3, 1.0, 0.45, b, fill=GRAY1, fontsize=FS_SMALL, fontweight='bold') bx += 1.2 save_fig(fig, 'q20_flink_arch.png') # ============================================================ # 12. Spark Streaming architecture # ============================================================ def gen_spark_streaming_arch(): fig, ax = plt.subplots(figsize=(9, 5)) ax.set_xlim(0, 12) ax.set_ylim(0, 7) ax.set_aspect('auto') ax.axis('off') ax.set_title('Spark Streaming — architektura (micro-batch)', fontsize=FS_TITLE, fontweight='bold', pad=10) # Cluster border draw_box(ax, 0.3, 0.5, 11.4, 5.8, '', fill=GRAY4, rounded=True, lw=2.5) ax.text(6.0, 6.0, 'SPARK CLUSTER', fontsize=FS_LABEL, ha='center', fontweight='bold') # Driver draw_box(ax, 1.0, 4.5, 3.0, 1.2, 'Driver\n(planuje mini-batche)', fill=GRAY2, fontsize=FS, fontweight='bold') draw_arrow(ax, 2.5, 4.5, 6.0, 4.0, lw=1.5) # Batches batches = ['batch 1\n(e1,e2,e3)', 'batch 2\n(e4,e5,e6)', 'batch 3\n(e7,e8,e9)'] for i, b in enumerate(batches): y = 2.8 - i * 1.0 draw_box(ax, 4.5, y, 2.5, 0.8, b, fill=GRAY1, fontsize=FS_SMALL, fontweight='bold') # map → reduce draw_arrow(ax, 7.0, y + 0.4, 7.5, y + 0.4, lw=1) draw_box(ax, 7.5, y, 1.3, 0.8, 'map→\nreduce', fill=GRAY3, fontsize=5.5) draw_arrow(ax, 8.8, y + 0.4, 9.3, y + 0.4, lw=1) draw_box(ax, 9.3, y, 1.5, 0.8, f'result {i+1}', fill='white', fontsize=FS_SMALL) # Spark ecosystem draw_box(ax, 1.0, 1.0, 3.0, 1.0, 'Spark SQL / MLlib\n(ten sam ekosystem!)', fill=GRAY5, fontsize=FS, fontweight='bold') ax.text(6.0, 0.3, 'ZALETA: batch API | WADA: latencja ≥ batch interval (~100ms)', fontsize=FS, ha='center', fontweight='bold', bbox=dict(boxstyle='round,pad=0.2', facecolor='white', edgecolor=LN)) save_fig(fig, 'q20_spark_streaming_arch.png') # ============================================================ # 13. Lambda vs Kappa architecture # ============================================================ def gen_lambda_vs_kappa(): fig, axes = plt.subplots(2, 1, figsize=(10, 7)) fig.suptitle('Architektura Lambda vs Kappa', fontsize=FS_TITLE, fontweight='bold') # --- Lambda --- ax = axes[0] ax.set_xlim(0, 12) ax.set_ylim(0, 5) ax.set_aspect('auto') ax.axis('off') ax.set_title('LAMBDA — 2 ścieżki (batch + speed)', fontsize=FS_LABEL, fontweight='bold') # Source draw_box(ax, 0.3, 1.8, 2.0, 1.5, 'Źródło\ndanych', fill=GRAY2, fontsize=FS, fontweight='bold') # Batch layer (top) draw_box(ax, 3.5, 3.3, 3.0, 1.2, 'Batch Layer\n(Spark)\nprzelicza co godzinę', fill=GRAY1, fontsize=FS_SMALL, fontweight='bold') draw_arrow(ax, 2.3, 3.0, 3.5, 3.9, lw=1.5) # Speed layer (bottom) draw_box(ax, 3.5, 0.8, 3.0, 1.2, 'Speed Layer\n(Flink)\nreal-time', fill=GRAY3, fontsize=FS_SMALL, fontweight='bold') draw_arrow(ax, 2.3, 2.2, 3.5, 1.4, lw=1.5) # Results draw_box(ax, 7.5, 3.3, 2.0, 1.2, 'Dokładne\nwyniki\n(wolne)', fill=GRAY4, fontsize=FS_SMALL) draw_arrow(ax, 6.5, 3.9, 7.5, 3.9, lw=1.5) draw_box(ax, 7.5, 0.8, 2.0, 1.2, 'Przybliżone\nwyniki\n(szybkie)', fill=GRAY4, fontsize=FS_SMALL) draw_arrow(ax, 6.5, 1.4, 7.5, 1.4, lw=1.5) # Merge draw_box(ax, 10.0, 2.0, 1.5, 1.5, 'MERGE\n→ UI', fill=GRAY5, fontsize=FS, fontweight='bold') draw_arrow(ax, 9.5, 3.5, 10.0, 3.0, lw=1.5) draw_arrow(ax, 9.5, 1.8, 10.0, 2.5, lw=1.5) ax.text(6.0, 0.1, '2 systemy, 2 kody — złożone ale pewne', fontsize=FS, ha='center', style='italic', bbox=dict(boxstyle='round,pad=0.2', facecolor=GRAY4, edgecolor=GRAY5)) # --- Kappa --- ax = axes[1] ax.set_xlim(0, 12) ax.set_ylim(0, 4) ax.set_aspect('auto') ax.axis('off') ax.set_title('KAPPA — 1 ścieżka (streaming only)', fontsize=FS_LABEL, fontweight='bold') # Source draw_box(ax, 0.3, 1.3, 2.0, 1.5, 'Źródło\ndanych', fill=GRAY2, fontsize=FS, fontweight='bold') # Single streaming layer draw_box(ax, 3.5, 1.3, 3.5, 1.5, 'Streaming Layer\n(Flink)\n+ replay z Kafka log', fill=GRAY1, fontsize=FS, fontweight='bold') draw_arrow(ax, 2.3, 2.05, 3.5, 2.05, lw=2) # Output draw_box(ax, 8.0, 1.3, 2.5, 1.5, 'Wyniki\n→ UI', fill=GRAY4, fontsize=FS, fontweight='bold') draw_arrow(ax, 7.0, 2.05, 8.0, 2.05, lw=2) # Replay arrow ax.annotate('', xy=(3.5, 1.0), xytext=(7.0, 1.0), arrowprops=dict(arrowstyle='<-', lw=1.5, color=LN, connectionstyle='arc3,rad=0.3', linestyle='--')) ax.text(5.25, 0.3, 'Replay z Kafka\n(przetwórz historię od nowa)', fontsize=FS_SMALL, ha='center', style='italic') ax.text(6.0, 3.3, '1 system, 1 kod — prostsze, ale replay = dużo I/O', fontsize=FS, ha='center', style='italic', bbox=dict(boxstyle='round,pad=0.2', facecolor=GRAY4, edgecolor=GRAY5)) fig.tight_layout(rect=[0, 0, 1, 0.92]) save_fig(fig, 'q20_lambda_vs_kappa.png') # ============================================================ # 14. Lambda vs Kappa comparison table # ============================================================ def gen_lambda_kappa_table(): fig, ax = plt.subplots(figsize=(8, 3.5)) ax.set_xlim(0, 10) ax.set_ylim(-4.5, 1) ax.set_aspect('auto') ax.axis('off') ax.set_title('Lambda vs Kappa — porównanie', fontsize=FS_TITLE, fontweight='bold', pad=10) headers = ['Cecha', 'Lambda', 'Kappa'] col_w = [2.5, 3.5, 3.5] rows = [ ['Ścieżki', '2 (batch + speed)', '1 (streaming)'], ['Kod', '2 implementacje', '1 implementacja'], ['Złożoność', 'wysoka', 'niska'], ['Replay', 'batch przelicza', 'Kafka replay'], ['Spójność', 'merge wymagany', 'natywna'], ['Przykład', 'Netflix, LinkedIn', 'Uber, Confluent'], ] draw_table(ax, headers, rows, x0=0.25, y0=0.5, col_widths=col_w, row_h=0.55, fontsize=7.5) save_fig(fig, 'q20_lambda_kappa_table.png') # ============================================================ # 15. Exactly-once comparison # ============================================================ def gen_exactly_once(): fig, ax = plt.subplots(figsize=(9, 5)) ax.set_xlim(0, 12) ax.set_ylim(0, 7) ax.set_aspect('auto') ax.axis('off') ax.set_title('Exactly-Once — mechanizmy na 3 platformach', fontsize=FS_TITLE, fontweight='bold', pad=10) # Flink draw_box(ax, 0.3, 4.3, 11.0, 2.0, '', fill=GRAY4, rounded=True, lw=1.5) ax.text(1.0, 5.9, 'Flink — Distributed Snapshots (Chandy-Lamport)', fontsize=FS, fontweight='bold') flink_steps = ['source', '|B|', 'map()', '|B|', 'sink()'] bx = 1.0 for s in flink_steps: if s == '|B|': ax.text(bx + 0.25, 4.85, s, fontsize=FS, ha='center', fontweight='bold', bbox=dict(boxstyle='round,pad=0.1', facecolor=GRAY5, edgecolor=LN)) draw_arrow(ax, bx - 0.1, 4.85, bx + 0.05, 4.85, lw=1) bx += 0.7 else: draw_box(ax, bx, 4.6, 1.5, 0.55, s, fill=GRAY1, fontsize=FS_SMALL, fontweight='bold') bx += 1.8 ax.text(8.5, 5.0, 'barrier → save state\n→ checkpoint (HDFS/S3)', fontsize=FS_SMALL, style='italic') # Kafka Streams draw_box(ax, 0.3, 2.3, 11.0, 1.5, '', fill=GRAY1, rounded=True, lw=1.5) ax.text(1.0, 3.5, 'Kafka Streams — Transakcje Kafka', fontsize=FS, fontweight='bold') ax.text(1.5, 2.85, 'idempotent producer + begin TX → produce → commit TX → ' 'consumer offsets w TX', fontsize=FS_SMALL) # Spark draw_box(ax, 0.3, 0.5, 11.0, 1.5, '', fill=GRAY3, rounded=True, lw=1.5) ax.text(1.0, 1.7, 'Spark Streaming — Write-Ahead Log (WAL)', fontsize=FS, fontweight='bold') ax.text(1.5, 1.05, 'WAL + checkpointing micro-batchów + idempotent sinks ' '(np. upsert do DB)', fontsize=FS_SMALL) save_fig(fig, 'q20_exactly_once.png') # ============================================================ # 16. Late data strategies (DRAS) # ============================================================ def gen_late_data_strategies(): fig, ax = plt.subplots(figsize=(9, 5.5)) ax.set_xlim(0, 12) ax.set_ylim(0, 7) ax.set_aspect('auto') ax.axis('off') ax.set_title('Late Data — 4 strategie (mnemonik DRAS)', fontsize=FS_TITLE, fontweight='bold', pad=10) # Setup: window closed, late event arrives draw_box(ax, 0.5, 5.5, 4.5, 1.0, 'Okno [14:00–14:05]\nZAMKNIĘTE o 14:05', fill=GRAY2, fontsize=FS, fontweight='bold') draw_box(ax, 6.0, 5.5, 4.5, 1.0, 'Spóźnione zdarzenie\nevent_time=14:00:03\narrives=14:05:30', fill='#F8D7DA', fontsize=FS_SMALL, fontweight='bold') draw_arrow(ax, 10.5, 6.0, 5.0, 6.0, lw=2, color='#C62828', style='->') ax.text(7.5, 5.2, 'LATE!', fontsize=FS_LABEL, ha='center', fontweight='bold', color='#C62828') # 4 strategies strategies = [ ('D — Drop', 'Odrzuć spóźnione', '/dev/null', GRAY4), ('R — Recompute', 'Przelicz okno ponownie', 'poprawne ale kosztowne', GRAY1), ('A — Allowed lateness', 'Czekaj dodatkowy czas\n(np. +2 min)', 'kompromis pamięci', GRAY2), ('S — Side output', 'Przekieruj do osobnej\nkolejki', 'elastyczne, ręczna analiza', GRAY3), ] for i, (name, desc, tradeoff, color) in enumerate(strategies): y = 3.8 - i * 1.1 draw_box(ax, 0.5, y, 2.5, 0.9, name, fill=color, fontsize=FS, fontweight='bold') ax.text(3.3, y + 0.45, desc, fontsize=FS_SMALL, va='center') ax.text(8.5, y + 0.45, tradeoff, fontsize=FS_SMALL, va='center', style='italic', color='#555') save_fig(fig, 'q20_late_data_strategies.png') # ============================================================ # 17. Decision tree — which platform # ============================================================ def gen_decision_tree(): fig, ax = plt.subplots(figsize=(10, 5.5)) ax.set_xlim(0, 12) ax.set_ylim(0, 7) ax.set_aspect('auto') ax.axis('off') ax.set_title('Drzewo decyzyjne — wybór platformy', fontsize=FS_TITLE, fontweight='bold', pad=10) # Root question draw_box(ax, 3.5, 5.5, 4.5, 1.0, 'Latencja < 10ms\nwymagana?', fill=GRAY2, fontsize=FS, fontweight='bold') # TAK branch draw_arrow(ax, 3.5, 5.7, 2.0, 5.0, lw=1.5) ax.text(2.3, 5.3, 'TAK', fontsize=FS, fontweight='bold') draw_box(ax, 0.3, 3.5, 3.5, 1.0, 'Dane już w Kafce?\nProste transformacje?', fill=GRAY1, fontsize=FS, fontweight='bold') # TAK → Kafka Streams draw_arrow(ax, 0.3, 3.7, -0.1, 3.0, lw=1.5) ax.text(0.0, 3.3, 'TAK', fontsize=FS_SMALL, fontweight='bold') draw_box(ax, -0.3, 1.8, 2.5, 1.0, 'Kafka\nStreams', fill=GRAY5, fontsize=FS_LABEL, fontweight='bold') # NIE → Flink draw_arrow(ax, 3.8, 3.7, 4.5, 3.0, lw=1.5) ax.text(4.0, 3.3, 'NIE\n(złożona logika)', fontsize=FS_SMALL) draw_box(ax, 3.0, 1.8, 2.5, 1.0, 'Apache\nFlink', fill=GRAY5, fontsize=FS_LABEL, fontweight='bold') # NIE branch draw_arrow(ax, 8.0, 5.7, 9.5, 5.0, lw=1.5) ax.text(8.7, 5.3, 'NIE', fontsize=FS, fontweight='bold') draw_box(ax, 7.5, 3.5, 4.2, 1.0, '~100ms–1s OK?\nPotrzeba ML / SQL?', fill=GRAY1, fontsize=FS, fontweight='bold') # TAK + ML → Spark draw_arrow(ax, 9.5, 3.5, 9.5, 3.0, lw=1.5) ax.text(10.0, 3.3, 'TAK + ML/SQL', fontsize=FS_SMALL) draw_box(ax, 8.0, 1.8, 2.5, 1.0, 'Spark\nStreaming', fill=GRAY5, fontsize=FS_LABEL, fontweight='bold') # TAK + proste → Kafka Streams too draw_arrow(ax, 7.5, 3.7, 6.5, 3.0, lw=1.5) ax.text(6.3, 3.3, 'proste + TAK', fontsize=FS_SMALL) draw_box(ax, 5.8, 1.8, 2.0, 1.0, 'Kafka\nStreams', fill=GRAY5, fontsize=FS, fontweight='bold') # Legend ax.text(6.0, 0.7, 'Reguła: Kafka Streams = najprostsze (library) | ' 'Flink = najpotężniejszy (true streaming) | Spark = ekosystem ML', fontsize=FS, ha='center', bbox=dict(boxstyle='round,pad=0.2', facecolor=GRAY4, edgecolor=GRAY5)) save_fig(fig, 'q20_decision_tree.png') # ============================================================ # MAIN # ============================================================ if __name__ == '__main__': print("Generating ALL PYTANIE 20 diagrams...") gen_batch_vs_streaming() gen_window_types() gen_event_vs_processing_time() gen_tumbling_fraud() gen_sliding_sla() gen_session_users() gen_streaming_ecosystem() gen_true_vs_microbatch() gen_platform_comparison() gen_kafka_streams_arch() gen_flink_arch() gen_spark_streaming_arch() gen_lambda_vs_kappa() gen_lambda_kappa_table() gen_exactly_once() gen_late_data_strategies() gen_decision_tree() print("\nAll 17 PYTANIE 20 diagrams generated successfully!")