mirror of
https://github.com/kuhyx/praca_magisterska.git
synced 2026-07-04 13:43:05 +02:00
1173 lines
44 KiB
Python
1173 lines
44 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
"""
|
|||
|
|
Generate ALL diagrams for PYTANIE 20: Analityka danych strumieniowych.
|
|||
|
|
Monochrome, A4-printable PNGs (300 DPI).
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import matplotlib
|
|||
|
|
matplotlib.use('Agg')
|
|||
|
|
import matplotlib.pyplot as plt
|
|||
|
|
import matplotlib.patches as mpatches
|
|||
|
|
from matplotlib.patches import FancyBboxPatch
|
|||
|
|
import numpy as np
|
|||
|
|
import os
|
|||
|
|
|
|||
|
|
DPI = 300
|
|||
|
|
BG = 'white'
|
|||
|
|
LN = 'black'
|
|||
|
|
FS = 8
|
|||
|
|
FS_TITLE = 11
|
|||
|
|
FS_SMALL = 6.5
|
|||
|
|
FS_LABEL = 9
|
|||
|
|
OUTPUT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'img')
|
|||
|
|
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
|||
|
|
|
|||
|
|
GRAY1 = '#E8E8E8'
|
|||
|
|
GRAY2 = '#D0D0D0'
|
|||
|
|
GRAY3 = '#B8B8B8'
|
|||
|
|
GRAY4 = '#F5F5F5'
|
|||
|
|
GRAY5 = '#C0C0C0'
|
|||
|
|
|
|||
|
|
|
|||
|
|
def draw_box(ax, x, y, w, h, text, fill='white', lw=1.2, fontsize=FS,
|
|||
|
|
fontweight='normal', ha='center', va='center', rounded=True,
|
|||
|
|
edgecolor=LN, linestyle='-'):
|
|||
|
|
if rounded:
|
|||
|
|
rect = FancyBboxPatch((x, y), w, h, boxstyle="round,pad=0.05",
|
|||
|
|
lw=lw, edgecolor=edgecolor, facecolor=fill,
|
|||
|
|
linestyle=linestyle)
|
|||
|
|
else:
|
|||
|
|
rect = mpatches.Rectangle((x, y), w, h, lw=lw, edgecolor=edgecolor,
|
|||
|
|
facecolor=fill, linestyle=linestyle)
|
|||
|
|
ax.add_patch(rect)
|
|||
|
|
ax.text(x + w/2, y + h/2, text, ha=ha, va=va, fontsize=fontsize,
|
|||
|
|
fontweight=fontweight, wrap=True)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def draw_arrow(ax, x1, y1, x2, y2, lw=1.2, style='->', color=LN):
|
|||
|
|
ax.annotate("", xy=(x2, y2), xytext=(x1, y1),
|
|||
|
|
arrowprops=dict(arrowstyle=style, color=color, lw=lw))
|
|||
|
|
|
|||
|
|
|
|||
|
|
def save_fig(fig, name):
|
|||
|
|
path = os.path.join(OUTPUT_DIR, name)
|
|||
|
|
fig.savefig(path, dpi=DPI, bbox_inches='tight', facecolor=BG, pad_inches=0.15)
|
|||
|
|
plt.close(fig)
|
|||
|
|
print(f" Saved: {path}")
|
|||
|
|
|
|||
|
|
|
|||
|
|
def draw_table(ax, headers, rows, x0, y0, col_widths, row_h=0.4,
|
|||
|
|
header_fill=GRAY2, row_fills=None, fontsize=FS, header_fontsize=None):
|
|||
|
|
if header_fontsize is None:
|
|||
|
|
header_fontsize = fontsize
|
|||
|
|
n_cols = len(headers)
|
|||
|
|
# Header
|
|||
|
|
cx = x0
|
|||
|
|
for j, hdr in enumerate(headers):
|
|||
|
|
draw_box(ax, cx, y0, col_widths[j], row_h, hdr, fill=header_fill,
|
|||
|
|
fontsize=header_fontsize, fontweight='bold', rounded=False)
|
|||
|
|
cx += col_widths[j]
|
|||
|
|
# Rows
|
|||
|
|
for i, row in enumerate(rows):
|
|||
|
|
cy = y0 - (i + 1) * row_h
|
|||
|
|
cx = x0
|
|||
|
|
fill = GRAY4 if (i % 2 == 0) else 'white'
|
|||
|
|
if row_fills and i < len(row_fills):
|
|||
|
|
fill = row_fills[i]
|
|||
|
|
for j, cell in enumerate(row):
|
|||
|
|
fw = 'bold' if j == 0 else 'normal'
|
|||
|
|
draw_box(ax, cx, cy, col_widths[j], row_h, cell, fill=fill,
|
|||
|
|
fontsize=fontsize, fontweight=fw, rounded=False)
|
|||
|
|
cx += col_widths[j]
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ============================================================
|
|||
|
|
# 1. Batch vs Streaming concept
|
|||
|
|
# ============================================================
|
|||
|
|
def gen_batch_vs_streaming():
|
|||
|
|
fig, axes = plt.subplots(2, 1, figsize=(9, 5))
|
|||
|
|
fig.suptitle('Batch vs Streaming — dwa modele przetwarzania',
|
|||
|
|
fontsize=FS_TITLE, fontweight='bold')
|
|||
|
|
|
|||
|
|
# Batch
|
|||
|
|
ax = axes[0]
|
|||
|
|
ax.set_xlim(0, 12)
|
|||
|
|
ax.set_ylim(0, 3)
|
|||
|
|
ax.set_aspect('auto')
|
|||
|
|
ax.axis('off')
|
|||
|
|
ax.set_title('BATCH (wsadowe)', fontsize=FS_LABEL, fontweight='bold')
|
|||
|
|
|
|||
|
|
# Data collected
|
|||
|
|
draw_box(ax, 0.5, 0.8, 3.0, 1.4, 'Zbierz WSZYSTKIE\ndane\n(godziny / dni)',
|
|||
|
|
fill=GRAY1, fontsize=FS, fontweight='bold')
|
|||
|
|
draw_arrow(ax, 3.5, 1.5, 4.5, 1.5, lw=2)
|
|||
|
|
draw_box(ax, 4.5, 0.8, 2.5, 1.4, 'Analiza\n(batch job)',
|
|||
|
|
fill=GRAY2, fontsize=FS, fontweight='bold')
|
|||
|
|
draw_arrow(ax, 7.0, 1.5, 8.0, 1.5, lw=2)
|
|||
|
|
draw_box(ax, 8.0, 0.8, 2.5, 1.4, 'Wynik\n(jednorazowy)',
|
|||
|
|
fill=GRAY3, fontsize=FS, fontweight='bold')
|
|||
|
|
ax.text(11.0, 1.5, 'min-h', fontsize=FS, va='center', fontweight='bold')
|
|||
|
|
|
|||
|
|
# Streaming
|
|||
|
|
ax = axes[1]
|
|||
|
|
ax.set_xlim(0, 12)
|
|||
|
|
ax.set_ylim(0, 3)
|
|||
|
|
ax.set_aspect('auto')
|
|||
|
|
ax.axis('off')
|
|||
|
|
ax.set_title('STREAMING (strumieniowe)', fontsize=FS_LABEL, fontweight='bold')
|
|||
|
|
|
|||
|
|
# Events flowing
|
|||
|
|
events_x = [0.5, 1.5, 2.5, 3.5]
|
|||
|
|
for i, ex in enumerate(events_x):
|
|||
|
|
draw_box(ax, ex, 1.0, 0.8, 0.8, f'e{i+1}', fill=GRAY4,
|
|||
|
|
fontsize=FS, fontweight='bold', rounded=False)
|
|||
|
|
if i < len(events_x) - 1:
|
|||
|
|
draw_arrow(ax, ex + 0.8, 1.4, ex + 1.0, 1.4, lw=1)
|
|||
|
|
|
|||
|
|
ax.text(4.8, 1.4, '...', fontsize=FS_LABEL, va='center')
|
|||
|
|
draw_arrow(ax, 5.2, 1.4, 5.8, 1.4, lw=2)
|
|||
|
|
|
|||
|
|
draw_box(ax, 5.8, 0.8, 2.8, 1.4, 'Analiza\nCIĄGŁA\n(event-by-event)',
|
|||
|
|
fill=GRAY2, fontsize=FS, fontweight='bold')
|
|||
|
|
draw_arrow(ax, 8.6, 1.5, 9.3, 1.5, lw=2)
|
|||
|
|
draw_box(ax, 9.3, 0.8, 2.0, 1.4, 'Wyniki\nciągłe',
|
|||
|
|
fill=GRAY3, fontsize=FS, fontweight='bold')
|
|||
|
|
ax.text(11.5, 0.5, 'ms-s', fontsize=FS, va='center', fontweight='bold')
|
|||
|
|
|
|||
|
|
# Arrow marking infinity
|
|||
|
|
ax.annotate('', xy=(0.2, 1.4), xytext=(-0.3, 1.4),
|
|||
|
|
arrowprops=dict(arrowstyle='->', lw=1.5, color=LN))
|
|||
|
|
ax.text(0.0, 2.3, '∞ zdarzeń', fontsize=FS_SMALL, ha='center', style='italic')
|
|||
|
|
|
|||
|
|
fig.tight_layout(rect=[0, 0, 1, 0.92])
|
|||
|
|
save_fig(fig, 'q20_batch_vs_streaming.png')
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ============================================================
|
|||
|
|
# 2. All 4 window types (TSSG)
|
|||
|
|
# ============================================================
|
|||
|
|
def gen_window_types():
|
|||
|
|
fig, axes = plt.subplots(4, 1, figsize=(9, 10))
|
|||
|
|
fig.suptitle('4 typy okien — TSSG', fontsize=FS_TITLE, fontweight='bold')
|
|||
|
|
|
|||
|
|
# Events on a timeline (shared concept)
|
|||
|
|
events = list(range(1, 13))
|
|||
|
|
|
|||
|
|
# --- Tumbling ---
|
|||
|
|
ax = axes[0]
|
|||
|
|
ax.set_xlim(0, 14)
|
|||
|
|
ax.set_ylim(0, 4)
|
|||
|
|
ax.set_aspect('auto')
|
|||
|
|
ax.axis('off')
|
|||
|
|
ax.set_title('Tumbling Window (okno przerzutne) — rozłączne, stały rozmiar',
|
|||
|
|
fontsize=FS_LABEL, fontweight='bold')
|
|||
|
|
|
|||
|
|
# Time axis
|
|||
|
|
ax.annotate('', xy=(13.5, 1.0), xytext=(0.3, 1.0),
|
|||
|
|
arrowprops=dict(arrowstyle='->', lw=1.5, color=LN))
|
|||
|
|
ax.text(13.5, 0.6, 'czas', fontsize=FS_SMALL, ha='center')
|
|||
|
|
|
|||
|
|
# Events
|
|||
|
|
for i, e in enumerate(events):
|
|||
|
|
x = 1.0 + i * 1.0
|
|||
|
|
ax.plot(x, 1.0, 'ko', markersize=5)
|
|||
|
|
ax.text(x, 0.5, f'e{e}', fontsize=FS_SMALL, ha='center')
|
|||
|
|
|
|||
|
|
# Windows
|
|||
|
|
colors_w = [GRAY1, GRAY3, GRAY1, GRAY3]
|
|||
|
|
for w in range(4):
|
|||
|
|
x_start = 1.0 + w * 3.0 - 0.3
|
|||
|
|
rect = mpatches.FancyBboxPatch((x_start, 1.5), 3.0, 1.2,
|
|||
|
|
boxstyle="round,pad=0.1", facecolor=colors_w[w],
|
|||
|
|
edgecolor=LN, lw=1.5)
|
|||
|
|
ax.add_patch(rect)
|
|||
|
|
ax.text(x_start + 1.5, 2.1, f'Okno {w+1}', fontsize=FS,
|
|||
|
|
ha='center', fontweight='bold')
|
|||
|
|
# Braces down to events
|
|||
|
|
for j in range(3):
|
|||
|
|
ex = 1.0 + w * 3.0 + j * 1.0
|
|||
|
|
ax.plot([ex, ex], [1.0, 1.5], color=LN, lw=0.8, linestyle='--')
|
|||
|
|
|
|||
|
|
ax.text(7.0, 3.2, 'Każde zdarzenie → DOKŁADNIE 1 okno. Zero nakładania.',
|
|||
|
|
fontsize=FS, ha='center', style='italic',
|
|||
|
|
bbox=dict(boxstyle='round,pad=0.2', facecolor=GRAY4, edgecolor=GRAY5))
|
|||
|
|
|
|||
|
|
# --- Sliding ---
|
|||
|
|
ax = axes[1]
|
|||
|
|
ax.set_xlim(0, 14)
|
|||
|
|
ax.set_ylim(0, 5)
|
|||
|
|
ax.set_aspect('auto')
|
|||
|
|
ax.axis('off')
|
|||
|
|
ax.set_title('Sliding Window (okno przesuwne) — nakładające, stały rozmiar + krok',
|
|||
|
|
fontsize=FS_LABEL, fontweight='bold')
|
|||
|
|
|
|||
|
|
ax.annotate('', xy=(13.5, 1.0), xytext=(0.3, 1.0),
|
|||
|
|
arrowprops=dict(arrowstyle='->', lw=1.5, color=LN))
|
|||
|
|
ax.text(13.5, 0.6, 'czas', fontsize=FS_SMALL, ha='center')
|
|||
|
|
|
|||
|
|
for i, e in enumerate(events[:8]):
|
|||
|
|
x = 1.0 + i * 1.0
|
|||
|
|
ax.plot(x, 1.0, 'ko', markersize=5)
|
|||
|
|
ax.text(x, 0.5, f'e{e}', fontsize=FS_SMALL, ha='center')
|
|||
|
|
|
|||
|
|
# Sliding windows: size=4, slide=2
|
|||
|
|
slide_colors = [GRAY1, GRAY2, GRAY3]
|
|||
|
|
for w in range(3):
|
|||
|
|
x_start = 0.7 + w * 2.0
|
|||
|
|
y_base = 1.5 + w * 0.9
|
|||
|
|
rect = mpatches.FancyBboxPatch((x_start, y_base), 4.0, 0.7,
|
|||
|
|
boxstyle="round,pad=0.08", facecolor=slide_colors[w],
|
|||
|
|
edgecolor=LN, lw=1.5, alpha=0.7)
|
|||
|
|
ax.add_patch(rect)
|
|||
|
|
ax.text(x_start + 2.0, y_base + 0.35, f'Okno {w+1} (size=4)',
|
|||
|
|
fontsize=FS_SMALL, ha='center', fontweight='bold')
|
|||
|
|
|
|||
|
|
ax.text(10.5, 3.5, 'krok=2\nNakładanie!\ne3,e4 → w oknie 1 i 2',
|
|||
|
|
fontsize=FS, ha='center', style='italic',
|
|||
|
|
bbox=dict(boxstyle='round,pad=0.2', facecolor=GRAY4, edgecolor=GRAY5))
|
|||
|
|
|
|||
|
|
# --- Session ---
|
|||
|
|
ax = axes[2]
|
|||
|
|
ax.set_xlim(0, 14)
|
|||
|
|
ax.set_ylim(0, 4)
|
|||
|
|
ax.set_aspect('auto')
|
|||
|
|
ax.axis('off')
|
|||
|
|
ax.set_title('Session Window (okno sesji) — dynamiczny rozmiar, gap = przerwa',
|
|||
|
|
fontsize=FS_LABEL, fontweight='bold')
|
|||
|
|
|
|||
|
|
ax.annotate('', xy=(13.5, 1.0), xytext=(0.3, 1.0),
|
|||
|
|
arrowprops=dict(arrowstyle='->', lw=1.5, color=LN))
|
|||
|
|
ax.text(13.5, 0.6, 'czas', fontsize=FS_SMALL, ha='center')
|
|||
|
|
|
|||
|
|
# Cluster 1: events close together
|
|||
|
|
cluster1 = [1.0, 1.8, 2.3, 3.0]
|
|||
|
|
for x in cluster1:
|
|||
|
|
ax.plot(x, 1.0, 'ko', markersize=5)
|
|||
|
|
|
|||
|
|
# Gap
|
|||
|
|
ax.annotate('', xy=(7.0, 0.7), xytext=(4.0, 0.7),
|
|||
|
|
arrowprops=dict(arrowstyle='<->', lw=1, color=LN))
|
|||
|
|
ax.text(5.5, 0.3, 'GAP > timeout', fontsize=FS, ha='center',
|
|||
|
|
fontweight='bold', style='italic')
|
|||
|
|
|
|||
|
|
# Cluster 2
|
|||
|
|
cluster2 = [8.0, 8.8, 9.5]
|
|||
|
|
for x in cluster2:
|
|||
|
|
ax.plot(x, 1.0, 'ko', markersize=5)
|
|||
|
|
|
|||
|
|
# Session boxes
|
|||
|
|
rect1 = mpatches.FancyBboxPatch((0.7, 1.4), 2.6, 1.0,
|
|||
|
|
boxstyle="round,pad=0.1", facecolor=GRAY1, edgecolor=LN, lw=1.5)
|
|||
|
|
ax.add_patch(rect1)
|
|||
|
|
ax.text(2.0, 1.9, 'Sesja 1\n(4 zdarzenia)', fontsize=FS, ha='center',
|
|||
|
|
fontweight='bold')
|
|||
|
|
|
|||
|
|
rect2 = mpatches.FancyBboxPatch((7.7, 1.4), 2.1, 1.0,
|
|||
|
|
boxstyle="round,pad=0.1", facecolor=GRAY3, edgecolor=LN, lw=1.5)
|
|||
|
|
ax.add_patch(rect2)
|
|||
|
|
ax.text(8.75, 1.9, 'Sesja 2\n(3 zdarzenia)', fontsize=FS, ha='center',
|
|||
|
|
fontweight='bold')
|
|||
|
|
|
|||
|
|
ax.text(5.5, 3.0, 'Nowa sesja po przerwie > gap',
|
|||
|
|
fontsize=FS, ha='center', style='italic')
|
|||
|
|
|
|||
|
|
# --- Global ---
|
|||
|
|
ax = axes[3]
|
|||
|
|
ax.set_xlim(0, 14)
|
|||
|
|
ax.set_ylim(0, 4)
|
|||
|
|
ax.set_aspect('auto')
|
|||
|
|
ax.axis('off')
|
|||
|
|
ax.set_title('Global Window — jedno okno na cały strumień + trigger',
|
|||
|
|
fontsize=FS_LABEL, fontweight='bold')
|
|||
|
|
|
|||
|
|
ax.annotate('', xy=(13.5, 1.0), xytext=(0.3, 1.0),
|
|||
|
|
arrowprops=dict(arrowstyle='->', lw=1.5, color=LN))
|
|||
|
|
ax.text(13.5, 0.6, 'czas', fontsize=FS_SMALL, ha='center')
|
|||
|
|
|
|||
|
|
for i in range(12):
|
|||
|
|
x = 1.0 + i * 1.0
|
|||
|
|
ax.plot(x, 1.0, 'ko', markersize=5)
|
|||
|
|
|
|||
|
|
# One big window
|
|||
|
|
rect = mpatches.FancyBboxPatch((0.5, 1.4), 12.5, 1.0,
|
|||
|
|
boxstyle="round,pad=0.1", facecolor=GRAY1, edgecolor=LN, lw=2)
|
|||
|
|
ax.add_patch(rect)
|
|||
|
|
ax.text(6.75, 1.9, 'GLOBAL WINDOW (cały strumień)', fontsize=FS,
|
|||
|
|
ha='center', fontweight='bold')
|
|||
|
|
|
|||
|
|
# Trigger markers
|
|||
|
|
for tx in [4.0, 8.0, 12.0]:
|
|||
|
|
ax.plot([tx, tx], [1.4, 2.4], color=LN, lw=2, linestyle='--')
|
|||
|
|
ax.text(tx, 2.7, 'EMIT', fontsize=FS_SMALL, ha='center',
|
|||
|
|
fontweight='bold',
|
|||
|
|
bbox=dict(boxstyle='round,pad=0.1', facecolor=GRAY3, edgecolor=LN))
|
|||
|
|
|
|||
|
|
ax.text(6.75, 3.3, 'Trigger decyduje kiedy emitować (np. co N zdarzeń)',
|
|||
|
|
fontsize=FS, ha='center', style='italic')
|
|||
|
|
|
|||
|
|
fig.tight_layout(rect=[0, 0, 1, 0.94])
|
|||
|
|
save_fig(fig, 'q20_window_types.png')
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ============================================================
|
|||
|
|
# 3. Event Time vs Processing Time scatter + watermark
|
|||
|
|
# ============================================================
|
|||
|
|
def gen_event_vs_processing_time():
|
|||
|
|
fig, axes = plt.subplots(1, 2, figsize=(11, 5))
|
|||
|
|
fig.suptitle('Event Time vs Processing Time + Watermark',
|
|||
|
|
fontsize=FS_TITLE, fontweight='bold')
|
|||
|
|
|
|||
|
|
# --- Panel 1: Ideal vs Real ---
|
|||
|
|
ax = axes[0]
|
|||
|
|
ax.set_xlim(0, 10)
|
|||
|
|
ax.set_ylim(0, 10)
|
|||
|
|
ax.set_aspect('equal')
|
|||
|
|
ax.set_xlabel('Event Time', fontsize=FS_LABEL)
|
|||
|
|
ax.set_ylabel('Processing Time', fontsize=FS_LABEL)
|
|||
|
|
ax.set_title('Idealny vs Realny świat', fontsize=FS_LABEL, fontweight='bold')
|
|||
|
|
ax.set_xticks([])
|
|||
|
|
ax.set_yticks([])
|
|||
|
|
|
|||
|
|
# Ideal line
|
|||
|
|
ax.plot([0, 9], [0, 9], 'k--', lw=1.5, label='ideał (brak opóźnień)')
|
|||
|
|
|
|||
|
|
# Real scattered points (processing >= event, some out of order)
|
|||
|
|
np.random.seed(42)
|
|||
|
|
event_times = np.sort(np.random.uniform(1, 8, 15))
|
|||
|
|
proc_times = event_times + np.random.exponential(0.5, 15)
|
|||
|
|
# Make some out of order
|
|||
|
|
idx = [3, 7, 11]
|
|||
|
|
for i in idx:
|
|||
|
|
proc_times[i] += 1.5
|
|||
|
|
|
|||
|
|
ax.scatter(event_times, proc_times, c='black', s=30, zorder=5,
|
|||
|
|
label='zdarzenia (realne)')
|
|||
|
|
|
|||
|
|
# Highlight out-of-order
|
|||
|
|
for i in idx:
|
|||
|
|
ax.annotate('out-of-order', xy=(event_times[i], proc_times[i]),
|
|||
|
|
xytext=(event_times[i] + 0.8, proc_times[i] + 0.5),
|
|||
|
|
fontsize=FS_SMALL, ha='left',
|
|||
|
|
arrowprops=dict(arrowstyle='->', lw=0.8, color='#555'))
|
|||
|
|
|
|||
|
|
ax.legend(fontsize=FS_SMALL, loc='upper left')
|
|||
|
|
ax.text(7, 2, 'Opóźnienie\nsieciowe ↑', fontsize=FS, ha='center',
|
|||
|
|
style='italic', color='#555')
|
|||
|
|
|
|||
|
|
# --- Panel 2: Watermark concept ---
|
|||
|
|
ax = axes[1]
|
|||
|
|
ax.set_xlim(0, 10)
|
|||
|
|
ax.set_ylim(0, 10)
|
|||
|
|
ax.set_aspect('equal')
|
|||
|
|
ax.set_xlabel('Event Time', fontsize=FS_LABEL)
|
|||
|
|
ax.set_ylabel('Processing Time', fontsize=FS_LABEL)
|
|||
|
|
ax.set_title('Watermark — granica postępu', fontsize=FS_LABEL, fontweight='bold')
|
|||
|
|
ax.set_xticks([])
|
|||
|
|
ax.set_yticks([])
|
|||
|
|
|
|||
|
|
# Events
|
|||
|
|
ax.scatter(event_times, proc_times, c='black', s=30, zorder=5)
|
|||
|
|
|
|||
|
|
# Watermark line (below most points, tracks progress)
|
|||
|
|
wm_x = np.linspace(0, 9, 50)
|
|||
|
|
wm_y = wm_x + 0.3 # watermark slightly above ideal
|
|||
|
|
ax.plot(wm_x, wm_y, 'k-', lw=2.5, label='Watermark')
|
|||
|
|
ax.fill_between(wm_x, 0, wm_y, alpha=0.15, color='gray')
|
|||
|
|
|
|||
|
|
ax.text(2.0, 1.0, 'PONIŻEJ watermark:\n„na pewno dotarło"',
|
|||
|
|
fontsize=FS, ha='center', fontweight='bold',
|
|||
|
|
bbox=dict(boxstyle='round,pad=0.2', facecolor=GRAY4, edgecolor=GRAY5))
|
|||
|
|
|
|||
|
|
# Late event
|
|||
|
|
late_x, late_y = event_times[7], proc_times[7]
|
|||
|
|
ax.scatter([late_x], [late_y], c='white', s=80, zorder=6,
|
|||
|
|
edgecolors='black', linewidths=2)
|
|||
|
|
ax.annotate('LATE DATA!\n(po watermarku)', xy=(late_x, late_y),
|
|||
|
|
xytext=(late_x + 1.2, late_y + 0.8),
|
|||
|
|
fontsize=FS_SMALL, ha='left', fontweight='bold',
|
|||
|
|
arrowprops=dict(arrowstyle='->', lw=1, color=LN))
|
|||
|
|
|
|||
|
|
ax.legend(fontsize=FS_SMALL, loc='upper left')
|
|||
|
|
|
|||
|
|
fig.tight_layout(rect=[0, 0, 1, 0.92])
|
|||
|
|
save_fig(fig, 'q20_event_vs_processing_time.png')
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ============================================================
|
|||
|
|
# 4. Tumbling window example — fraud detection
|
|||
|
|
# ============================================================
|
|||
|
|
def gen_tumbling_fraud():
|
|||
|
|
fig, ax = plt.subplots(figsize=(9, 4))
|
|||
|
|
ax.set_xlim(0, 12)
|
|||
|
|
ax.set_ylim(0, 5.5)
|
|||
|
|
ax.set_aspect('auto')
|
|||
|
|
ax.axis('off')
|
|||
|
|
ax.set_title('Tumbling Window — fraud detection (okno = 1 min)',
|
|||
|
|
fontsize=FS_TITLE, fontweight='bold', pad=10)
|
|||
|
|
|
|||
|
|
# Time axis
|
|||
|
|
ax.annotate('', xy=(11.5, 1.0), xytext=(0.5, 1.0),
|
|||
|
|
arrowprops=dict(arrowstyle='->', lw=1.5, color=LN))
|
|||
|
|
ax.text(6.0, 0.4, 'czas', fontsize=FS, ha='center')
|
|||
|
|
|
|||
|
|
# Window 1: normal
|
|||
|
|
draw_box(ax, 1.0, 1.5, 4.5, 3.0, '', fill=GRAY4, rounded=True, lw=2)
|
|||
|
|
ax.text(3.25, 4.2, '[14:00 — 14:01]', fontsize=FS_LABEL, ha='center',
|
|||
|
|
fontweight='bold')
|
|||
|
|
# Transactions
|
|||
|
|
txns1 = ['Sklep A: 50 zł', 'Sklep B: 30 zł', 'Stacja: 80 zł']
|
|||
|
|
for i, t in enumerate(txns1):
|
|||
|
|
draw_box(ax, 1.3, 3.3 - i * 0.55, 4.0, 0.45, t, fill=GRAY1,
|
|||
|
|
fontsize=FS_SMALL, rounded=False)
|
|||
|
|
ax.text(3.25, 1.7, 'count = 3 → OK', fontsize=FS, ha='center',
|
|||
|
|
fontweight='bold', color='#2E7D32',
|
|||
|
|
bbox=dict(boxstyle='round,pad=0.15', facecolor='#E8F5E9', edgecolor='#2E7D32'))
|
|||
|
|
|
|||
|
|
# Window 2: fraud!
|
|||
|
|
draw_box(ax, 6.0, 1.5, 4.5, 3.0, '', fill=GRAY1, rounded=True, lw=2)
|
|||
|
|
ax.text(8.25, 4.2, '[14:01 — 14:02]', fontsize=FS_LABEL, ha='center',
|
|||
|
|
fontweight='bold')
|
|||
|
|
txns2 = ['ATM Warszawa: 500 zł', 'ATM Kraków: 500 zł', '... +45 transakcji']
|
|||
|
|
for i, t in enumerate(txns2):
|
|||
|
|
draw_box(ax, 6.3, 3.3 - i * 0.55, 4.0, 0.45, t, fill=GRAY3,
|
|||
|
|
fontsize=FS_SMALL, rounded=False)
|
|||
|
|
ax.text(8.25, 1.7, 'count = 47 → ALERT!', fontsize=FS, ha='center',
|
|||
|
|
fontweight='bold', color='#C62828',
|
|||
|
|
bbox=dict(boxstyle='round,pad=0.15', facecolor='#F8D7DA', edgecolor='#C62828'))
|
|||
|
|
|
|||
|
|
save_fig(fig, 'q20_tumbling_fraud.png')
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ============================================================
|
|||
|
|
# 5. Sliding window — SLA monitoring
|
|||
|
|
# ============================================================
|
|||
|
|
def gen_sliding_sla():
|
|||
|
|
fig, ax = plt.subplots(figsize=(9, 4.5))
|
|||
|
|
ax.set_xlim(0, 12)
|
|||
|
|
ax.set_ylim(0, 6)
|
|||
|
|
ax.set_aspect('auto')
|
|||
|
|
ax.axis('off')
|
|||
|
|
ax.set_title('Sliding Window — monitoring SLA (okno=5min, krok=1min)',
|
|||
|
|
fontsize=FS_TITLE, fontweight='bold', pad=10)
|
|||
|
|
|
|||
|
|
# Time axis
|
|||
|
|
ax.annotate('', xy=(11.5, 0.5), xytext=(0.5, 0.5),
|
|||
|
|
arrowprops=dict(arrowstyle='->', lw=1.5, color=LN))
|
|||
|
|
times = ['14:05', '14:06', '14:07', '14:08', '14:09']
|
|||
|
|
latencies = [120, 180, 340, 290, 150]
|
|||
|
|
sla = 200
|
|||
|
|
|
|||
|
|
for i, (t, lat) in enumerate(zip(times, latencies)):
|
|||
|
|
x = 1.5 + i * 2.0
|
|||
|
|
ax.text(x, 0.1, t, fontsize=FS, ha='center')
|
|||
|
|
|
|||
|
|
# Bar proportional to latency
|
|||
|
|
bar_h = lat / 100.0
|
|||
|
|
is_breach = lat > sla
|
|||
|
|
fill = '#F8D7DA' if is_breach else GRAY1
|
|||
|
|
edge = '#C62828' if is_breach else LN
|
|||
|
|
draw_box(ax, x - 0.5, 1.0, 1.0, bar_h, '', fill=fill,
|
|||
|
|
rounded=False, edgecolor=edge, lw=1.5)
|
|||
|
|
ax.text(x, 1.0 + bar_h + 0.15, f'{lat}ms', fontsize=FS,
|
|||
|
|
ha='center', fontweight='bold',
|
|||
|
|
color='#C62828' if is_breach else LN)
|
|||
|
|
|
|||
|
|
# Status
|
|||
|
|
status = 'ALERT!' if is_breach else 'OK'
|
|||
|
|
ax.text(x, 1.0 + bar_h + 0.55, status, fontsize=FS_SMALL,
|
|||
|
|
ha='center', fontweight='bold',
|
|||
|
|
color='#C62828' if is_breach else '#2E7D32')
|
|||
|
|
|
|||
|
|
# SLA line
|
|||
|
|
sla_y = 1.0 + sla / 100.0
|
|||
|
|
ax.plot([0.8, 11.2], [sla_y, sla_y], 'k--', lw=1.5)
|
|||
|
|
ax.text(11.3, sla_y, f'SLA={sla}ms', fontsize=FS, va='center',
|
|||
|
|
fontweight='bold')
|
|||
|
|
|
|||
|
|
# Sliding window bracket
|
|||
|
|
ax.annotate('', xy=(1.0, 5.3), xytext=(5.0, 5.3),
|
|||
|
|
arrowprops=dict(arrowstyle='<->', lw=1.5, color=LN))
|
|||
|
|
ax.text(3.0, 5.6, 'okno = 5 min', fontsize=FS, ha='center',
|
|||
|
|
fontweight='bold')
|
|||
|
|
|
|||
|
|
ax.annotate('', xy=(3.0, 4.8), xytext=(5.0, 4.8),
|
|||
|
|
arrowprops=dict(arrowstyle='<->', lw=1, color='#555'))
|
|||
|
|
ax.text(4.0, 4.4, 'krok = 1 min\n(nakładanie!)', fontsize=FS_SMALL,
|
|||
|
|
ha='center', style='italic')
|
|||
|
|
|
|||
|
|
save_fig(fig, 'q20_sliding_sla.png')
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ============================================================
|
|||
|
|
# 6. Session window — user sessions
|
|||
|
|
# ============================================================
|
|||
|
|
def gen_session_users():
|
|||
|
|
fig, axes = plt.subplots(2, 1, figsize=(10, 5))
|
|||
|
|
fig.suptitle('Session Window — sesje użytkowników (gap = 30 min)',
|
|||
|
|
fontsize=FS_TITLE, fontweight='bold')
|
|||
|
|
|
|||
|
|
# Anna: 2 sessions
|
|||
|
|
ax = axes[0]
|
|||
|
|
ax.set_xlim(0, 14)
|
|||
|
|
ax.set_ylim(0, 3.5)
|
|||
|
|
ax.set_aspect('auto')
|
|||
|
|
ax.axis('off')
|
|||
|
|
ax.set_title('Użytkownik Anna', fontsize=FS_LABEL, fontweight='bold')
|
|||
|
|
|
|||
|
|
ax.annotate('', xy=(13.5, 1.0), xytext=(0.3, 1.0),
|
|||
|
|
arrowprops=dict(arrowstyle='->', lw=1.5, color=LN))
|
|||
|
|
|
|||
|
|
# Clicks cluster 1
|
|||
|
|
for x in [1.0, 1.8, 2.5, 3.2]:
|
|||
|
|
ax.plot(x, 1.0, 'ko', markersize=6)
|
|||
|
|
# Clicks cluster 2
|
|||
|
|
for x in [9.0, 9.8, 10.5]:
|
|||
|
|
ax.plot(x, 1.0, 'ko', markersize=6)
|
|||
|
|
|
|||
|
|
# Sessions
|
|||
|
|
rect1 = mpatches.FancyBboxPatch((0.7, 1.5), 2.8, 1.2,
|
|||
|
|
boxstyle="round,pad=0.1", facecolor=GRAY1, edgecolor=LN, lw=1.5)
|
|||
|
|
ax.add_patch(rect1)
|
|||
|
|
ax.text(2.1, 2.1, 'Sesja 1\n4 kliknięcia, 12 min', fontsize=FS,
|
|||
|
|
ha='center', fontweight='bold')
|
|||
|
|
|
|||
|
|
rect2 = mpatches.FancyBboxPatch((8.7, 1.5), 2.1, 1.2,
|
|||
|
|
boxstyle="round,pad=0.1", facecolor=GRAY3, edgecolor=LN, lw=1.5)
|
|||
|
|
ax.add_patch(rect2)
|
|||
|
|
ax.text(9.75, 2.1, 'Sesja 2\n3 kliknięcia, 8 min', fontsize=FS,
|
|||
|
|
ha='center', fontweight='bold')
|
|||
|
|
|
|||
|
|
# Gap
|
|||
|
|
ax.annotate('', xy=(8.5, 0.5), xytext=(3.8, 0.5),
|
|||
|
|
arrowprops=dict(arrowstyle='<->', lw=1.5, color=LN))
|
|||
|
|
ax.text(6.15, 0.1, 'cisza 45 min > gap(30)', fontsize=FS, ha='center',
|
|||
|
|
fontweight='bold', style='italic')
|
|||
|
|
|
|||
|
|
# Bob: 1 session
|
|||
|
|
ax = axes[1]
|
|||
|
|
ax.set_xlim(0, 14)
|
|||
|
|
ax.set_ylim(0, 3.5)
|
|||
|
|
ax.set_aspect('auto')
|
|||
|
|
ax.axis('off')
|
|||
|
|
ax.set_title('Użytkownik Bob', fontsize=FS_LABEL, fontweight='bold')
|
|||
|
|
|
|||
|
|
ax.annotate('', xy=(13.5, 1.0), xytext=(0.3, 1.0),
|
|||
|
|
arrowprops=dict(arrowstyle='->', lw=1.5, color=LN))
|
|||
|
|
|
|||
|
|
# Clicks spread evenly
|
|||
|
|
bobs = [1.0, 2.5, 4.0, 5.5, 7.0, 8.5, 10.0]
|
|||
|
|
for x in bobs:
|
|||
|
|
ax.plot(x, 1.0, 'ko', markersize=6)
|
|||
|
|
|
|||
|
|
rect = mpatches.FancyBboxPatch((0.7, 1.5), 9.6, 1.2,
|
|||
|
|
boxstyle="round,pad=0.1", facecolor=GRAY1, edgecolor=LN, lw=2)
|
|||
|
|
ax.add_patch(rect)
|
|||
|
|
ax.text(5.5, 2.1, 'Sesja 1 (ciągła) — 7 kliknięć, każde < 30 min od poprzedniego',
|
|||
|
|
fontsize=FS, ha='center', fontweight='bold')
|
|||
|
|
|
|||
|
|
fig.tight_layout(rect=[0, 0, 1, 0.92])
|
|||
|
|
save_fig(fig, 'q20_session_users.png')
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ============================================================
|
|||
|
|
# 7. Streaming ecosystem overview
|
|||
|
|
# ============================================================
|
|||
|
|
def gen_streaming_ecosystem():
|
|||
|
|
fig, ax = plt.subplots(figsize=(10, 5.5))
|
|||
|
|
ax.set_xlim(0, 12)
|
|||
|
|
ax.set_ylim(0, 7)
|
|||
|
|
ax.set_aspect('auto')
|
|||
|
|
ax.axis('off')
|
|||
|
|
ax.set_title('Ekosystem przetwarzania strumieniowego',
|
|||
|
|
fontsize=FS_TITLE, fontweight='bold', pad=10)
|
|||
|
|
|
|||
|
|
# Source
|
|||
|
|
draw_box(ax, 0.3, 2.5, 2.0, 3.0, 'Kafka\nTopics\n(źródło)', fill=GRAY2,
|
|||
|
|
fontsize=FS, fontweight='bold')
|
|||
|
|
|
|||
|
|
# Engines
|
|||
|
|
engines = [
|
|||
|
|
('Kafka Streams\n(library w JVM)', GRAY1, 4.7),
|
|||
|
|
('Apache Flink\n(klaster)', GRAY3, 3.2),
|
|||
|
|
('Spark Streaming\n(klaster)', GRAY5, 1.7),
|
|||
|
|
]
|
|||
|
|
for label, color, y in engines:
|
|||
|
|
draw_box(ax, 4.0, y, 3.0, 1.2, label, fill=color,
|
|||
|
|
fontsize=FS, fontweight='bold')
|
|||
|
|
draw_arrow(ax, 2.3, 4.0, 4.0, y + 0.6, lw=1.5)
|
|||
|
|
|
|||
|
|
# Sinks
|
|||
|
|
sinks = [
|
|||
|
|
('Kafka topic\n/ baza danych', GRAY4, 4.7),
|
|||
|
|
('DB / Kafka\n/ S3', GRAY4, 3.2),
|
|||
|
|
('HDFS / DB\n/ dashboard', GRAY4, 1.7),
|
|||
|
|
]
|
|||
|
|
for label, color, y in sinks:
|
|||
|
|
draw_box(ax, 8.5, y, 2.5, 1.2, label, fill=color,
|
|||
|
|
fontsize=FS)
|
|||
|
|
draw_arrow(ax, 7.0, y + 0.6, 8.5, y + 0.6, lw=1.5)
|
|||
|
|
|
|||
|
|
# Labels
|
|||
|
|
ax.text(1.3, 6.0, 'ŹRÓDŁO', fontsize=FS_LABEL, ha='center', fontweight='bold')
|
|||
|
|
ax.text(5.5, 6.2, 'SILNIK', fontsize=FS_LABEL, ha='center', fontweight='bold')
|
|||
|
|
ax.text(9.75, 6.2, 'WYNIK', fontsize=FS_LABEL, ha='center', fontweight='bold')
|
|||
|
|
|
|||
|
|
# Latency annotations
|
|||
|
|
ax.text(5.5, 5.95, '~1-10 ms', fontsize=FS_SMALL, ha='center', style='italic')
|
|||
|
|
ax.text(5.5, 4.5, '<10 ms', fontsize=FS_SMALL, ha='center', style='italic')
|
|||
|
|
ax.text(5.5, 3.0, '~100 ms', fontsize=FS_SMALL, ha='center', style='italic')
|
|||
|
|
|
|||
|
|
save_fig(fig, 'q20_streaming_ecosystem.png')
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ============================================================
|
|||
|
|
# 8. True streaming vs Micro-batch
|
|||
|
|
# ============================================================
|
|||
|
|
def gen_true_vs_microbatch():
|
|||
|
|
fig, axes = plt.subplots(2, 1, figsize=(10, 5.5))
|
|||
|
|
fig.suptitle('True Streaming vs Micro-Batch',
|
|||
|
|
fontsize=FS_TITLE, fontweight='bold')
|
|||
|
|
|
|||
|
|
# True streaming
|
|||
|
|
ax = axes[0]
|
|||
|
|
ax.set_xlim(0, 12)
|
|||
|
|
ax.set_ylim(0, 3.5)
|
|||
|
|
ax.set_aspect('auto')
|
|||
|
|
ax.axis('off')
|
|||
|
|
ax.set_title('TRUE STREAMING (Flink, Kafka Streams) — event-by-event',
|
|||
|
|
fontsize=FS_LABEL, fontweight='bold')
|
|||
|
|
|
|||
|
|
for i in range(6):
|
|||
|
|
x = 1.0 + i * 1.8
|
|||
|
|
# Event
|
|||
|
|
draw_box(ax, x, 2.0, 0.8, 0.7, f'e{i+1}', fill=GRAY1,
|
|||
|
|
fontsize=FS, fontweight='bold', rounded=False)
|
|||
|
|
# Arrow down
|
|||
|
|
draw_arrow(ax, x + 0.4, 2.0, x + 0.4, 1.4, lw=1)
|
|||
|
|
# Result
|
|||
|
|
draw_box(ax, x, 0.5, 0.8, 0.7, f'r{i+1}', fill=GRAY3,
|
|||
|
|
fontsize=FS, fontweight='bold', rounded=False)
|
|||
|
|
# Latency label
|
|||
|
|
ax.text(x + 0.4, 1.6, '~ms', fontsize=5, ha='center', color='#555')
|
|||
|
|
|
|||
|
|
ax.text(11.5, 1.3, 'Latencja:\n< 10 ms', fontsize=FS, ha='center',
|
|||
|
|
fontweight='bold',
|
|||
|
|
bbox=dict(boxstyle='round,pad=0.2', facecolor=GRAY4, edgecolor=GRAY5))
|
|||
|
|
|
|||
|
|
# Micro-batch
|
|||
|
|
ax = axes[1]
|
|||
|
|
ax.set_xlim(0, 12)
|
|||
|
|
ax.set_ylim(0, 3.5)
|
|||
|
|
ax.set_aspect('auto')
|
|||
|
|
ax.axis('off')
|
|||
|
|
ax.set_title('MICRO-BATCH (Spark Streaming) — grupami co ~100ms',
|
|||
|
|
fontsize=FS_LABEL, fontweight='bold')
|
|||
|
|
|
|||
|
|
batch_colors = [GRAY1, GRAY2, GRAY3]
|
|||
|
|
for b in range(3):
|
|||
|
|
bx = 0.8 + b * 3.5
|
|||
|
|
# Batch boundary
|
|||
|
|
draw_box(ax, bx, 1.8, 3.0, 1.0, '', fill=batch_colors[b],
|
|||
|
|
rounded=True, lw=1.5)
|
|||
|
|
ax.text(bx + 1.5, 2.6, f'Batch {b+1}', fontsize=FS,
|
|||
|
|
ha='center', fontweight='bold')
|
|||
|
|
for j in range(3):
|
|||
|
|
ex = bx + 0.3 + j * 0.9
|
|||
|
|
draw_box(ax, ex, 2.0, 0.7, 0.5, f'e{b*3+j+1}', fill='white',
|
|||
|
|
fontsize=FS_SMALL, rounded=False)
|
|||
|
|
|
|||
|
|
# Arrow down
|
|||
|
|
draw_arrow(ax, bx + 1.5, 1.8, bx + 1.5, 1.2, lw=1.5)
|
|||
|
|
# Result
|
|||
|
|
draw_box(ax, bx + 0.5, 0.4, 2.0, 0.7, f'result {b+1}', fill=GRAY4,
|
|||
|
|
fontsize=FS, fontweight='bold')
|
|||
|
|
|
|||
|
|
ax.text(11.5, 1.3, 'Latencja:\n~100ms–s', fontsize=FS, ha='center',
|
|||
|
|
fontweight='bold',
|
|||
|
|
bbox=dict(boxstyle='round,pad=0.2', facecolor=GRAY4, edgecolor=GRAY5))
|
|||
|
|
|
|||
|
|
fig.tight_layout(rect=[0, 0, 1, 0.92])
|
|||
|
|
save_fig(fig, 'q20_true_vs_microbatch.png')
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ============================================================
|
|||
|
|
# 9. Platform comparison table
|
|||
|
|
# ============================================================
|
|||
|
|
def gen_platform_comparison():
|
|||
|
|
fig, ax = plt.subplots(figsize=(9, 5))
|
|||
|
|
ax.set_xlim(0, 11.5)
|
|||
|
|
ax.set_ylim(-6, 1)
|
|||
|
|
ax.set_aspect('auto')
|
|||
|
|
ax.axis('off')
|
|||
|
|
ax.set_title('Porównanie platform strumieniowych',
|
|||
|
|
fontsize=FS_TITLE, fontweight='bold', pad=10)
|
|||
|
|
|
|||
|
|
headers = ['Cecha', 'Kafka Streams', 'Apache Flink', 'Spark Streaming']
|
|||
|
|
col_w = [2.5, 2.8, 2.8, 2.8]
|
|||
|
|
rows = [
|
|||
|
|
['Model', 'event-by-event', 'event-by-event', 'micro-batch (~100ms)'],
|
|||
|
|
['Deployment', 'library (w JVM)', 'klaster', 'klaster'],
|
|||
|
|
['Latencja', '~1–10 ms', '< 10 ms', '100 ms – sekundy'],
|
|||
|
|
['Exactly-once', 'Kafka TXN', 'checkpointing', 'WAL'],
|
|||
|
|
['State', 'RocksDB local', 'RocksDB + ckpt', 'in-memory / ext'],
|
|||
|
|
['Okna', 'T, S, Session', 'wszystkie + custom', 'T, S'],
|
|||
|
|
['Use case', 'Kafka → Kafka', 'złożona analityka', 'ETL + ML / SQL'],
|
|||
|
|
]
|
|||
|
|
draw_table(ax, headers, rows, x0=0.25, y0=0.5, col_widths=col_w,
|
|||
|
|
row_h=0.6, fontsize=7, header_fontsize=8)
|
|||
|
|
|
|||
|
|
save_fig(fig, 'q20_platform_comparison.png')
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ============================================================
|
|||
|
|
# 10. Kafka Streams architecture
|
|||
|
|
# ============================================================
|
|||
|
|
def gen_kafka_streams_arch():
|
|||
|
|
fig, ax = plt.subplots(figsize=(9, 5))
|
|||
|
|
ax.set_xlim(0, 12)
|
|||
|
|
ax.set_ylim(0, 7)
|
|||
|
|
ax.set_aspect('auto')
|
|||
|
|
ax.axis('off')
|
|||
|
|
ax.set_title('Kafka Streams — architektura (library w JVM)',
|
|||
|
|
fontsize=FS_TITLE, fontweight='bold', pad=10)
|
|||
|
|
|
|||
|
|
# Outer box: Your Java application
|
|||
|
|
draw_box(ax, 0.5, 0.5, 11.0, 5.5, '', fill=GRAY4, rounded=True, lw=2.5)
|
|||
|
|
ax.text(6.0, 5.7, 'Twoja aplikacja Java (JVM)', fontsize=FS_LABEL,
|
|||
|
|
ha='center', fontweight='bold')
|
|||
|
|
|
|||
|
|
# Kafka Consumer
|
|||
|
|
draw_box(ax, 1.0, 3.0, 2.5, 1.5, 'Kafka\nConsumer\n(input topic)',
|
|||
|
|
fill=GRAY1, fontsize=FS, fontweight='bold')
|
|||
|
|
|
|||
|
|
# Processing
|
|||
|
|
draw_box(ax, 4.5, 3.0, 2.5, 1.5, 'Kafka Streams\n(logika\nbiznesowa)',
|
|||
|
|
fill=GRAY2, fontsize=FS, fontweight='bold')
|
|||
|
|
|
|||
|
|
# Kafka Producer
|
|||
|
|
draw_box(ax, 8.0, 3.0, 2.5, 1.5, 'Kafka\nProducer\n(output topic)',
|
|||
|
|
fill=GRAY1, fontsize=FS, fontweight='bold')
|
|||
|
|
|
|||
|
|
# Arrows
|
|||
|
|
draw_arrow(ax, 3.5, 3.75, 4.5, 3.75, lw=2)
|
|||
|
|
draw_arrow(ax, 7.0, 3.75, 8.0, 3.75, lw=2)
|
|||
|
|
|
|||
|
|
# RocksDB state store
|
|||
|
|
draw_box(ax, 4.5, 1.0, 2.5, 1.3, 'RocksDB\n(stan lokalny)',
|
|||
|
|
fill=GRAY3, fontsize=FS, fontweight='bold')
|
|||
|
|
ax.plot([5.75, 5.75], [3.0, 2.3], color=LN, lw=1.5)
|
|||
|
|
ax.text(7.3, 1.6, 'okna, joiny,\nagregacje', fontsize=FS_SMALL,
|
|||
|
|
style='italic', va='center')
|
|||
|
|
|
|||
|
|
# Key message
|
|||
|
|
ax.text(6.0, 0.2, 'NIE potrzebujesz osobnego klastra! '
|
|||
|
|
'Skalujesz = więcej instancji JVM.', fontsize=FS, ha='center',
|
|||
|
|
fontweight='bold',
|
|||
|
|
bbox=dict(boxstyle='round,pad=0.2', facecolor='white', edgecolor=LN))
|
|||
|
|
|
|||
|
|
save_fig(fig, 'q20_kafka_streams_arch.png')
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ============================================================
|
|||
|
|
# 11. Flink architecture + checkpointing
|
|||
|
|
# ============================================================
|
|||
|
|
def gen_flink_arch():
|
|||
|
|
fig, ax = plt.subplots(figsize=(9, 6))
|
|||
|
|
ax.set_xlim(0, 12)
|
|||
|
|
ax.set_ylim(0, 8)
|
|||
|
|
ax.set_aspect('auto')
|
|||
|
|
ax.axis('off')
|
|||
|
|
ax.set_title('Apache Flink — architektura klastra + checkpointing',
|
|||
|
|
fontsize=FS_TITLE, fontweight='bold', pad=10)
|
|||
|
|
|
|||
|
|
# Cluster border
|
|||
|
|
draw_box(ax, 0.3, 1.0, 11.4, 6.2, '', fill=GRAY4, rounded=True, lw=2.5)
|
|||
|
|
ax.text(6.0, 6.95, 'FLINK CLUSTER', fontsize=FS_LABEL,
|
|||
|
|
ha='center', fontweight='bold')
|
|||
|
|
|
|||
|
|
# Job Manager
|
|||
|
|
draw_box(ax, 1.0, 5.5, 3.0, 1.2, 'Job Manager\n(koordynacja,\ncheckpointy)',
|
|||
|
|
fill=GRAY2, fontsize=FS, fontweight='bold')
|
|||
|
|
|
|||
|
|
# Task Managers
|
|||
|
|
draw_box(ax, 1.0, 3.0, 10.0, 2.0, '', fill='white', rounded=True, lw=1.5)
|
|||
|
|
ax.text(6.0, 4.7, 'Task Managers (workery)', fontsize=FS,
|
|||
|
|
ha='center', fontweight='bold')
|
|||
|
|
|
|||
|
|
slots = ['source\n& map()', 'map()', 'window()\n& reduce', 'sink()']
|
|||
|
|
for i, s in enumerate(slots):
|
|||
|
|
x = 1.5 + i * 2.4
|
|||
|
|
draw_box(ax, x, 3.3, 2.0, 1.2, f'Slot {i+1}\n{s}', fill=GRAY1,
|
|||
|
|
fontsize=FS_SMALL, fontweight='bold')
|
|||
|
|
|
|||
|
|
draw_arrow(ax, 2.5, 5.5, 6.0, 5.0, lw=1.5, style='->')
|
|||
|
|
ax.text(5.0, 5.5, 'przydziela\npodzadania', fontsize=FS_SMALL,
|
|||
|
|
style='italic')
|
|||
|
|
|
|||
|
|
# Checkpoint storage
|
|||
|
|
draw_box(ax, 5.5, 1.2, 3.5, 1.2, 'Checkpoint Storage\n(HDFS / S3)',
|
|||
|
|
fill=GRAY3, fontsize=FS, fontweight='bold')
|
|||
|
|
ax.plot([7.25, 7.25], [2.4, 3.3], color=LN, lw=1.5, linestyle='--')
|
|||
|
|
ax.text(8.0, 2.7, 'snapshoty\nstanu', fontsize=FS_SMALL, style='italic')
|
|||
|
|
|
|||
|
|
# Barrier concept at bottom
|
|||
|
|
ax.text(3.0, 1.6, 'Barrier:', fontsize=FS, fontweight='bold')
|
|||
|
|
barrier_boxes = ['source', '|B|', 'map', '|B|', 'sink']
|
|||
|
|
bx = 0.8
|
|||
|
|
for i, b in enumerate(barrier_boxes):
|
|||
|
|
if b == '|B|':
|
|||
|
|
ax.text(bx + 0.3, 1.5, b, fontsize=FS, ha='center',
|
|||
|
|
fontweight='bold',
|
|||
|
|
bbox=dict(boxstyle='round,pad=0.1', facecolor=GRAY5,
|
|||
|
|
edgecolor=LN))
|
|||
|
|
draw_arrow(ax, bx, 1.5, bx + 0.1, 1.5, lw=1)
|
|||
|
|
bx += 0.7
|
|||
|
|
else:
|
|||
|
|
draw_box(ax, bx, 1.3, 1.0, 0.45, b, fill=GRAY1,
|
|||
|
|
fontsize=FS_SMALL, fontweight='bold')
|
|||
|
|
bx += 1.2
|
|||
|
|
|
|||
|
|
save_fig(fig, 'q20_flink_arch.png')
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ============================================================
|
|||
|
|
# 12. Spark Streaming architecture
|
|||
|
|
# ============================================================
|
|||
|
|
def gen_spark_streaming_arch():
|
|||
|
|
fig, ax = plt.subplots(figsize=(9, 5))
|
|||
|
|
ax.set_xlim(0, 12)
|
|||
|
|
ax.set_ylim(0, 7)
|
|||
|
|
ax.set_aspect('auto')
|
|||
|
|
ax.axis('off')
|
|||
|
|
ax.set_title('Spark Streaming — architektura (micro-batch)',
|
|||
|
|
fontsize=FS_TITLE, fontweight='bold', pad=10)
|
|||
|
|
|
|||
|
|
# Cluster border
|
|||
|
|
draw_box(ax, 0.3, 0.5, 11.4, 5.8, '', fill=GRAY4, rounded=True, lw=2.5)
|
|||
|
|
ax.text(6.0, 6.0, 'SPARK CLUSTER', fontsize=FS_LABEL,
|
|||
|
|
ha='center', fontweight='bold')
|
|||
|
|
|
|||
|
|
# Driver
|
|||
|
|
draw_box(ax, 1.0, 4.5, 3.0, 1.2, 'Driver\n(planuje mini-batche)',
|
|||
|
|
fill=GRAY2, fontsize=FS, fontweight='bold')
|
|||
|
|
|
|||
|
|
draw_arrow(ax, 2.5, 4.5, 6.0, 4.0, lw=1.5)
|
|||
|
|
|
|||
|
|
# Batches
|
|||
|
|
batches = ['batch 1\n(e1,e2,e3)', 'batch 2\n(e4,e5,e6)', 'batch 3\n(e7,e8,e9)']
|
|||
|
|
for i, b in enumerate(batches):
|
|||
|
|
y = 2.8 - i * 1.0
|
|||
|
|
draw_box(ax, 4.5, y, 2.5, 0.8, b, fill=GRAY1,
|
|||
|
|
fontsize=FS_SMALL, fontweight='bold')
|
|||
|
|
# map → reduce
|
|||
|
|
draw_arrow(ax, 7.0, y + 0.4, 7.5, y + 0.4, lw=1)
|
|||
|
|
draw_box(ax, 7.5, y, 1.3, 0.8, 'map→\nreduce', fill=GRAY3,
|
|||
|
|
fontsize=5.5)
|
|||
|
|
draw_arrow(ax, 8.8, y + 0.4, 9.3, y + 0.4, lw=1)
|
|||
|
|
draw_box(ax, 9.3, y, 1.5, 0.8, f'result {i+1}', fill='white',
|
|||
|
|
fontsize=FS_SMALL)
|
|||
|
|
|
|||
|
|
# Spark ecosystem
|
|||
|
|
draw_box(ax, 1.0, 1.0, 3.0, 1.0, 'Spark SQL / MLlib\n(ten sam ekosystem!)',
|
|||
|
|
fill=GRAY5, fontsize=FS, fontweight='bold')
|
|||
|
|
|
|||
|
|
ax.text(6.0, 0.3, 'ZALETA: batch API | WADA: latencja ≥ batch interval (~100ms)',
|
|||
|
|
fontsize=FS, ha='center', fontweight='bold',
|
|||
|
|
bbox=dict(boxstyle='round,pad=0.2', facecolor='white', edgecolor=LN))
|
|||
|
|
|
|||
|
|
save_fig(fig, 'q20_spark_streaming_arch.png')
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ============================================================
|
|||
|
|
# 13. Lambda vs Kappa architecture
|
|||
|
|
# ============================================================
|
|||
|
|
def gen_lambda_vs_kappa():
|
|||
|
|
fig, axes = plt.subplots(2, 1, figsize=(10, 7))
|
|||
|
|
fig.suptitle('Architektura Lambda vs Kappa',
|
|||
|
|
fontsize=FS_TITLE, fontweight='bold')
|
|||
|
|
|
|||
|
|
# --- Lambda ---
|
|||
|
|
ax = axes[0]
|
|||
|
|
ax.set_xlim(0, 12)
|
|||
|
|
ax.set_ylim(0, 5)
|
|||
|
|
ax.set_aspect('auto')
|
|||
|
|
ax.axis('off')
|
|||
|
|
ax.set_title('LAMBDA — 2 ścieżki (batch + speed)',
|
|||
|
|
fontsize=FS_LABEL, fontweight='bold')
|
|||
|
|
|
|||
|
|
# Source
|
|||
|
|
draw_box(ax, 0.3, 1.8, 2.0, 1.5, 'Źródło\ndanych', fill=GRAY2,
|
|||
|
|
fontsize=FS, fontweight='bold')
|
|||
|
|
|
|||
|
|
# Batch layer (top)
|
|||
|
|
draw_box(ax, 3.5, 3.3, 3.0, 1.2, 'Batch Layer\n(Spark)\nprzelicza co godzinę',
|
|||
|
|
fill=GRAY1, fontsize=FS_SMALL, fontweight='bold')
|
|||
|
|
draw_arrow(ax, 2.3, 3.0, 3.5, 3.9, lw=1.5)
|
|||
|
|
|
|||
|
|
# Speed layer (bottom)
|
|||
|
|
draw_box(ax, 3.5, 0.8, 3.0, 1.2, 'Speed Layer\n(Flink)\nreal-time',
|
|||
|
|
fill=GRAY3, fontsize=FS_SMALL, fontweight='bold')
|
|||
|
|
draw_arrow(ax, 2.3, 2.2, 3.5, 1.4, lw=1.5)
|
|||
|
|
|
|||
|
|
# Results
|
|||
|
|
draw_box(ax, 7.5, 3.3, 2.0, 1.2, 'Dokładne\nwyniki\n(wolne)',
|
|||
|
|
fill=GRAY4, fontsize=FS_SMALL)
|
|||
|
|
draw_arrow(ax, 6.5, 3.9, 7.5, 3.9, lw=1.5)
|
|||
|
|
|
|||
|
|
draw_box(ax, 7.5, 0.8, 2.0, 1.2, 'Przybliżone\nwyniki\n(szybkie)',
|
|||
|
|
fill=GRAY4, fontsize=FS_SMALL)
|
|||
|
|
draw_arrow(ax, 6.5, 1.4, 7.5, 1.4, lw=1.5)
|
|||
|
|
|
|||
|
|
# Merge
|
|||
|
|
draw_box(ax, 10.0, 2.0, 1.5, 1.5, 'MERGE\n→ UI', fill=GRAY5,
|
|||
|
|
fontsize=FS, fontweight='bold')
|
|||
|
|
draw_arrow(ax, 9.5, 3.5, 10.0, 3.0, lw=1.5)
|
|||
|
|
draw_arrow(ax, 9.5, 1.8, 10.0, 2.5, lw=1.5)
|
|||
|
|
|
|||
|
|
ax.text(6.0, 0.1, '2 systemy, 2 kody — złożone ale pewne',
|
|||
|
|
fontsize=FS, ha='center', style='italic',
|
|||
|
|
bbox=dict(boxstyle='round,pad=0.2', facecolor=GRAY4, edgecolor=GRAY5))
|
|||
|
|
|
|||
|
|
# --- Kappa ---
|
|||
|
|
ax = axes[1]
|
|||
|
|
ax.set_xlim(0, 12)
|
|||
|
|
ax.set_ylim(0, 4)
|
|||
|
|
ax.set_aspect('auto')
|
|||
|
|
ax.axis('off')
|
|||
|
|
ax.set_title('KAPPA — 1 ścieżka (streaming only)',
|
|||
|
|
fontsize=FS_LABEL, fontweight='bold')
|
|||
|
|
|
|||
|
|
# Source
|
|||
|
|
draw_box(ax, 0.3, 1.3, 2.0, 1.5, 'Źródło\ndanych', fill=GRAY2,
|
|||
|
|
fontsize=FS, fontweight='bold')
|
|||
|
|
|
|||
|
|
# Single streaming layer
|
|||
|
|
draw_box(ax, 3.5, 1.3, 3.5, 1.5, 'Streaming Layer\n(Flink)\n+ replay z Kafka log',
|
|||
|
|
fill=GRAY1, fontsize=FS, fontweight='bold')
|
|||
|
|
draw_arrow(ax, 2.3, 2.05, 3.5, 2.05, lw=2)
|
|||
|
|
|
|||
|
|
# Output
|
|||
|
|
draw_box(ax, 8.0, 1.3, 2.5, 1.5, 'Wyniki\n→ UI', fill=GRAY4,
|
|||
|
|
fontsize=FS, fontweight='bold')
|
|||
|
|
draw_arrow(ax, 7.0, 2.05, 8.0, 2.05, lw=2)
|
|||
|
|
|
|||
|
|
# Replay arrow
|
|||
|
|
ax.annotate('', xy=(3.5, 1.0), xytext=(7.0, 1.0),
|
|||
|
|
arrowprops=dict(arrowstyle='<-', lw=1.5, color=LN,
|
|||
|
|
connectionstyle='arc3,rad=0.3', linestyle='--'))
|
|||
|
|
ax.text(5.25, 0.3, 'Replay z Kafka\n(przetwórz historię od nowa)',
|
|||
|
|
fontsize=FS_SMALL, ha='center', style='italic')
|
|||
|
|
|
|||
|
|
ax.text(6.0, 3.3, '1 system, 1 kod — prostsze, ale replay = dużo I/O',
|
|||
|
|
fontsize=FS, ha='center', style='italic',
|
|||
|
|
bbox=dict(boxstyle='round,pad=0.2', facecolor=GRAY4, edgecolor=GRAY5))
|
|||
|
|
|
|||
|
|
fig.tight_layout(rect=[0, 0, 1, 0.92])
|
|||
|
|
save_fig(fig, 'q20_lambda_vs_kappa.png')
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ============================================================
|
|||
|
|
# 14. Lambda vs Kappa comparison table
|
|||
|
|
# ============================================================
|
|||
|
|
def gen_lambda_kappa_table():
|
|||
|
|
fig, ax = plt.subplots(figsize=(8, 3.5))
|
|||
|
|
ax.set_xlim(0, 10)
|
|||
|
|
ax.set_ylim(-4.5, 1)
|
|||
|
|
ax.set_aspect('auto')
|
|||
|
|
ax.axis('off')
|
|||
|
|
ax.set_title('Lambda vs Kappa — porównanie',
|
|||
|
|
fontsize=FS_TITLE, fontweight='bold', pad=10)
|
|||
|
|
|
|||
|
|
headers = ['Cecha', 'Lambda', 'Kappa']
|
|||
|
|
col_w = [2.5, 3.5, 3.5]
|
|||
|
|
rows = [
|
|||
|
|
['Ścieżki', '2 (batch + speed)', '1 (streaming)'],
|
|||
|
|
['Kod', '2 implementacje', '1 implementacja'],
|
|||
|
|
['Złożoność', 'wysoka', 'niska'],
|
|||
|
|
['Replay', 'batch przelicza', 'Kafka replay'],
|
|||
|
|
['Spójność', 'merge wymagany', 'natywna'],
|
|||
|
|
['Przykład', 'Netflix, LinkedIn', 'Uber, Confluent'],
|
|||
|
|
]
|
|||
|
|
draw_table(ax, headers, rows, x0=0.25, y0=0.5, col_widths=col_w,
|
|||
|
|
row_h=0.55, fontsize=7.5)
|
|||
|
|
|
|||
|
|
save_fig(fig, 'q20_lambda_kappa_table.png')
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ============================================================
|
|||
|
|
# 15. Exactly-once comparison
|
|||
|
|
# ============================================================
|
|||
|
|
def gen_exactly_once():
|
|||
|
|
fig, ax = plt.subplots(figsize=(9, 5))
|
|||
|
|
ax.set_xlim(0, 12)
|
|||
|
|
ax.set_ylim(0, 7)
|
|||
|
|
ax.set_aspect('auto')
|
|||
|
|
ax.axis('off')
|
|||
|
|
ax.set_title('Exactly-Once — mechanizmy na 3 platformach',
|
|||
|
|
fontsize=FS_TITLE, fontweight='bold', pad=10)
|
|||
|
|
|
|||
|
|
# Flink
|
|||
|
|
draw_box(ax, 0.3, 4.3, 11.0, 2.0, '', fill=GRAY4, rounded=True, lw=1.5)
|
|||
|
|
ax.text(1.0, 5.9, 'Flink — Distributed Snapshots (Chandy-Lamport)',
|
|||
|
|
fontsize=FS, fontweight='bold')
|
|||
|
|
|
|||
|
|
flink_steps = ['source', '|B|', 'map()', '|B|', 'sink()']
|
|||
|
|
bx = 1.0
|
|||
|
|
for s in flink_steps:
|
|||
|
|
if s == '|B|':
|
|||
|
|
ax.text(bx + 0.25, 4.85, s, fontsize=FS, ha='center',
|
|||
|
|
fontweight='bold',
|
|||
|
|
bbox=dict(boxstyle='round,pad=0.1', facecolor=GRAY5,
|
|||
|
|
edgecolor=LN))
|
|||
|
|
draw_arrow(ax, bx - 0.1, 4.85, bx + 0.05, 4.85, lw=1)
|
|||
|
|
bx += 0.7
|
|||
|
|
else:
|
|||
|
|
draw_box(ax, bx, 4.6, 1.5, 0.55, s, fill=GRAY1,
|
|||
|
|
fontsize=FS_SMALL, fontweight='bold')
|
|||
|
|
bx += 1.8
|
|||
|
|
ax.text(8.5, 5.0, 'barrier → save state\n→ checkpoint (HDFS/S3)',
|
|||
|
|
fontsize=FS_SMALL, style='italic')
|
|||
|
|
|
|||
|
|
# Kafka Streams
|
|||
|
|
draw_box(ax, 0.3, 2.3, 11.0, 1.5, '', fill=GRAY1, rounded=True, lw=1.5)
|
|||
|
|
ax.text(1.0, 3.5, 'Kafka Streams — Transakcje Kafka',
|
|||
|
|
fontsize=FS, fontweight='bold')
|
|||
|
|
ax.text(1.5, 2.85, 'idempotent producer + begin TX → produce → commit TX → '
|
|||
|
|
'consumer offsets w TX', fontsize=FS_SMALL)
|
|||
|
|
|
|||
|
|
# Spark
|
|||
|
|
draw_box(ax, 0.3, 0.5, 11.0, 1.5, '', fill=GRAY3, rounded=True, lw=1.5)
|
|||
|
|
ax.text(1.0, 1.7, 'Spark Streaming — Write-Ahead Log (WAL)',
|
|||
|
|
fontsize=FS, fontweight='bold')
|
|||
|
|
ax.text(1.5, 1.05, 'WAL + checkpointing micro-batchów + idempotent sinks '
|
|||
|
|
'(np. upsert do DB)', fontsize=FS_SMALL)
|
|||
|
|
|
|||
|
|
save_fig(fig, 'q20_exactly_once.png')
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ============================================================
|
|||
|
|
# 16. Late data strategies (DRAS)
|
|||
|
|
# ============================================================
|
|||
|
|
def gen_late_data_strategies():
|
|||
|
|
fig, ax = plt.subplots(figsize=(9, 5.5))
|
|||
|
|
ax.set_xlim(0, 12)
|
|||
|
|
ax.set_ylim(0, 7)
|
|||
|
|
ax.set_aspect('auto')
|
|||
|
|
ax.axis('off')
|
|||
|
|
ax.set_title('Late Data — 4 strategie (mnemonik DRAS)',
|
|||
|
|
fontsize=FS_TITLE, fontweight='bold', pad=10)
|
|||
|
|
|
|||
|
|
# Setup: window closed, late event arrives
|
|||
|
|
draw_box(ax, 0.5, 5.5, 4.5, 1.0, 'Okno [14:00–14:05]\nZAMKNIĘTE o 14:05',
|
|||
|
|
fill=GRAY2, fontsize=FS, fontweight='bold')
|
|||
|
|
draw_box(ax, 6.0, 5.5, 4.5, 1.0, 'Spóźnione zdarzenie\nevent_time=14:00:03\narrives=14:05:30',
|
|||
|
|
fill='#F8D7DA', fontsize=FS_SMALL, fontweight='bold')
|
|||
|
|
draw_arrow(ax, 10.5, 6.0, 5.0, 6.0, lw=2, color='#C62828', style='->')
|
|||
|
|
ax.text(7.5, 5.2, 'LATE!', fontsize=FS_LABEL, ha='center',
|
|||
|
|
fontweight='bold', color='#C62828')
|
|||
|
|
|
|||
|
|
# 4 strategies
|
|||
|
|
strategies = [
|
|||
|
|
('D — Drop', 'Odrzuć spóźnione', '/dev/null', GRAY4),
|
|||
|
|
('R — Recompute', 'Przelicz okno ponownie', 'poprawne ale kosztowne', GRAY1),
|
|||
|
|
('A — Allowed lateness', 'Czekaj dodatkowy czas\n(np. +2 min)', 'kompromis pamięci', GRAY2),
|
|||
|
|
('S — Side output', 'Przekieruj do osobnej\nkolejki', 'elastyczne, ręczna analiza', GRAY3),
|
|||
|
|
]
|
|||
|
|
for i, (name, desc, tradeoff, color) in enumerate(strategies):
|
|||
|
|
y = 3.8 - i * 1.1
|
|||
|
|
draw_box(ax, 0.5, y, 2.5, 0.9, name, fill=color, fontsize=FS,
|
|||
|
|
fontweight='bold')
|
|||
|
|
ax.text(3.3, y + 0.45, desc, fontsize=FS_SMALL, va='center')
|
|||
|
|
ax.text(8.5, y + 0.45, tradeoff, fontsize=FS_SMALL, va='center',
|
|||
|
|
style='italic', color='#555')
|
|||
|
|
|
|||
|
|
save_fig(fig, 'q20_late_data_strategies.png')
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ============================================================
|
|||
|
|
# 17. Decision tree — which platform
|
|||
|
|
# ============================================================
|
|||
|
|
def gen_decision_tree():
|
|||
|
|
fig, ax = plt.subplots(figsize=(10, 5.5))
|
|||
|
|
ax.set_xlim(0, 12)
|
|||
|
|
ax.set_ylim(0, 7)
|
|||
|
|
ax.set_aspect('auto')
|
|||
|
|
ax.axis('off')
|
|||
|
|
ax.set_title('Drzewo decyzyjne — wybór platformy',
|
|||
|
|
fontsize=FS_TITLE, fontweight='bold', pad=10)
|
|||
|
|
|
|||
|
|
# Root question
|
|||
|
|
draw_box(ax, 3.5, 5.5, 4.5, 1.0, 'Latencja < 10ms\nwymagana?',
|
|||
|
|
fill=GRAY2, fontsize=FS, fontweight='bold')
|
|||
|
|
|
|||
|
|
# TAK branch
|
|||
|
|
draw_arrow(ax, 3.5, 5.7, 2.0, 5.0, lw=1.5)
|
|||
|
|
ax.text(2.3, 5.3, 'TAK', fontsize=FS, fontweight='bold')
|
|||
|
|
|
|||
|
|
draw_box(ax, 0.3, 3.5, 3.5, 1.0, 'Dane już w Kafce?\nProste transformacje?',
|
|||
|
|
fill=GRAY1, fontsize=FS, fontweight='bold')
|
|||
|
|
|
|||
|
|
# TAK → Kafka Streams
|
|||
|
|
draw_arrow(ax, 0.3, 3.7, -0.1, 3.0, lw=1.5)
|
|||
|
|
ax.text(0.0, 3.3, 'TAK', fontsize=FS_SMALL, fontweight='bold')
|
|||
|
|
draw_box(ax, -0.3, 1.8, 2.5, 1.0, 'Kafka\nStreams', fill=GRAY5,
|
|||
|
|
fontsize=FS_LABEL, fontweight='bold')
|
|||
|
|
|
|||
|
|
# NIE → Flink
|
|||
|
|
draw_arrow(ax, 3.8, 3.7, 4.5, 3.0, lw=1.5)
|
|||
|
|
ax.text(4.0, 3.3, 'NIE\n(złożona logika)', fontsize=FS_SMALL)
|
|||
|
|
draw_box(ax, 3.0, 1.8, 2.5, 1.0, 'Apache\nFlink', fill=GRAY5,
|
|||
|
|
fontsize=FS_LABEL, fontweight='bold')
|
|||
|
|
|
|||
|
|
# NIE branch
|
|||
|
|
draw_arrow(ax, 8.0, 5.7, 9.5, 5.0, lw=1.5)
|
|||
|
|
ax.text(8.7, 5.3, 'NIE', fontsize=FS, fontweight='bold')
|
|||
|
|
|
|||
|
|
draw_box(ax, 7.5, 3.5, 4.2, 1.0, '~100ms–1s OK?\nPotrzeba ML / SQL?',
|
|||
|
|
fill=GRAY1, fontsize=FS, fontweight='bold')
|
|||
|
|
|
|||
|
|
# TAK + ML → Spark
|
|||
|
|
draw_arrow(ax, 9.5, 3.5, 9.5, 3.0, lw=1.5)
|
|||
|
|
ax.text(10.0, 3.3, 'TAK + ML/SQL', fontsize=FS_SMALL)
|
|||
|
|
draw_box(ax, 8.0, 1.8, 2.5, 1.0, 'Spark\nStreaming', fill=GRAY5,
|
|||
|
|
fontsize=FS_LABEL, fontweight='bold')
|
|||
|
|
|
|||
|
|
# TAK + proste → Kafka Streams too
|
|||
|
|
draw_arrow(ax, 7.5, 3.7, 6.5, 3.0, lw=1.5)
|
|||
|
|
ax.text(6.3, 3.3, 'proste + TAK', fontsize=FS_SMALL)
|
|||
|
|
draw_box(ax, 5.8, 1.8, 2.0, 1.0, 'Kafka\nStreams', fill=GRAY5,
|
|||
|
|
fontsize=FS, fontweight='bold')
|
|||
|
|
|
|||
|
|
# Legend
|
|||
|
|
ax.text(6.0, 0.7, 'Reguła: Kafka Streams = najprostsze (library) | '
|
|||
|
|
'Flink = najpotężniejszy (true streaming) | Spark = ekosystem ML',
|
|||
|
|
fontsize=FS, ha='center',
|
|||
|
|
bbox=dict(boxstyle='round,pad=0.2', facecolor=GRAY4, edgecolor=GRAY5))
|
|||
|
|
|
|||
|
|
save_fig(fig, 'q20_decision_tree.png')
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ============================================================
|
|||
|
|
# MAIN
|
|||
|
|
# ============================================================
|
|||
|
|
if __name__ == '__main__':
|
|||
|
|
print("Generating ALL PYTANIE 20 diagrams...")
|
|||
|
|
gen_batch_vs_streaming()
|
|||
|
|
gen_window_types()
|
|||
|
|
gen_event_vs_processing_time()
|
|||
|
|
gen_tumbling_fraud()
|
|||
|
|
gen_sliding_sla()
|
|||
|
|
gen_session_users()
|
|||
|
|
gen_streaming_ecosystem()
|
|||
|
|
gen_true_vs_microbatch()
|
|||
|
|
gen_platform_comparison()
|
|||
|
|
gen_kafka_streams_arch()
|
|||
|
|
gen_flink_arch()
|
|||
|
|
gen_spark_streaming_arch()
|
|||
|
|
gen_lambda_vs_kappa()
|
|||
|
|
gen_lambda_kappa_table()
|
|||
|
|
gen_exactly_once()
|
|||
|
|
gen_late_data_strategies()
|
|||
|
|
gen_decision_tree()
|
|||
|
|
print("\nAll 17 PYTANIE 20 diagrams generated successfully!")
|