mirror of
https://github.com/kuhyx/testsAndMisc-archive.git
synced 2026-07-04 16:03:07 +02:00
- Replace print() with logging module - Add type annotations to all functions - Add Google-style docstrings to all public functions/classes - Extract magic numbers into named constants - Break complex functions into smaller helpers - Use Path.open() instead of open() - Use contextlib.suppress where appropriate
894 lines
25 KiB
Python
Executable File
894 lines
25 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""Cinema Day Planner - Maximize movies watched in a day.
|
|
|
|
Supports:
|
|
- Cinema City HTML/PDF schedules (auto-parsed)
|
|
- Manual input format
|
|
|
|
Usage:
|
|
./cinema_planner.py schedule.html # Parse Cinema City HTML
|
|
./cinema_planner.py schedule.pdf # Parse Cinema City PDF
|
|
./cinema_planner.py -i # Interactive manual input
|
|
./cinema_planner.py movies.txt # Manual format file
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
from contextlib import suppress
|
|
from dataclasses import dataclass, field
|
|
import importlib
|
|
from io import StringIO
|
|
import logging
|
|
from pathlib import Path
|
|
import re
|
|
import shutil
|
|
import subprocess
|
|
import sys
|
|
from typing import TYPE_CHECKING, TextIO
|
|
|
|
if TYPE_CHECKING:
|
|
import types
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Default genres to exclude (can be overridden with --all-genres)
|
|
DEFAULT_EXCLUDED_GENRES = {"horror"}
|
|
|
|
# Ads duration before movie starts (Cinema City shows ~15 min of ads)
|
|
ADS_DURATION = 15
|
|
|
|
# Constants for validation and parsing
|
|
_MIN_MANUAL_LINE_PARTS = 3
|
|
_MIN_TITLE_LENGTH = 3
|
|
_DEFAULT_MOVIE_DURATION = 120
|
|
_TITLE_LOOKAHEAD_LINES = 5
|
|
_SEPARATOR_WIDTH = 60
|
|
|
|
|
|
def _try_import(name: str) -> types.ModuleType | None:
|
|
"""Attempt to import a module, returning None if unavailable."""
|
|
try:
|
|
return importlib.import_module(name)
|
|
except ImportError:
|
|
return None
|
|
|
|
|
|
_pdfplumber = _try_import("pdfplumber")
|
|
_fitz = _try_import("fitz")
|
|
|
|
|
|
@dataclass
|
|
class Movie:
|
|
"""A movie with screening times and metadata."""
|
|
|
|
name: str
|
|
start_times: list[int]
|
|
duration: int
|
|
genres: list[str] = field(default_factory=list)
|
|
|
|
|
|
@dataclass
|
|
class Screening:
|
|
"""A specific screening of a movie at a particular time."""
|
|
|
|
movie: str
|
|
start: int # minutes from midnight
|
|
end: int # minutes from midnight
|
|
|
|
def overlaps(self, other: Screening, buffer: int = 0) -> bool:
|
|
"""Check if this screening overlaps with another, considering buffer."""
|
|
# Account for ADS_DURATION grace period
|
|
return not (
|
|
self.end + buffer <= other.start + ADS_DURATION
|
|
or other.end + buffer <= self.start + ADS_DURATION
|
|
)
|
|
|
|
def start_str(self) -> str:
|
|
"""Format start time as HH:MM."""
|
|
return f"{self.start // 60:02d}:{self.start % 60:02d}"
|
|
|
|
def end_str(self) -> str:
|
|
"""Format end time as HH:MM."""
|
|
return f"{self.end // 60:02d}:{self.end % 60:02d}"
|
|
|
|
|
|
def parse_time(time_str: str) -> int:
|
|
"""Parse time string like '18:20' to minutes from midnight."""
|
|
time_str = time_str.strip().replace(".", ":")
|
|
match = re.match(r"(\d{1,2}):(\d{2})", time_str)
|
|
if not match:
|
|
msg = f"Invalid time format: {time_str}"
|
|
raise ValueError(msg)
|
|
hours, minutes = int(match.group(1)), int(match.group(2))
|
|
return hours * 60 + minutes
|
|
|
|
|
|
def parse_duration(duration_str: str) -> int:
|
|
"""Parse duration like '1h 46m', '1:46', '106m', '110 min', etc."""
|
|
duration_str = duration_str.strip().lower()
|
|
|
|
# Try "X min" format (from Cinema City)
|
|
match = re.search(r"(\d+)\s*min", duration_str)
|
|
if match:
|
|
return int(match.group(1))
|
|
|
|
hours = 0
|
|
minutes = 0
|
|
|
|
h_match = re.search(r"(\d+)\s*h", duration_str)
|
|
m_match = re.search(r"(\d+)\s*m(?!in)", duration_str)
|
|
|
|
if h_match or m_match:
|
|
if h_match:
|
|
hours = int(h_match.group(1))
|
|
if m_match:
|
|
minutes = int(m_match.group(1))
|
|
return hours * 60 + minutes
|
|
|
|
# Try "H:MM" format
|
|
match = re.match(r"(\d+):(\d{2})", duration_str)
|
|
if match:
|
|
return int(match.group(1)) * 60 + int(match.group(2))
|
|
|
|
# Try pure minutes
|
|
match = re.match(r"(\d+)", duration_str)
|
|
if match:
|
|
return int(match.group(1))
|
|
|
|
msg = f"Invalid duration format: {duration_str}"
|
|
raise ValueError(msg)
|
|
|
|
|
|
def parse_manual_line(line: str) -> Movie | None:
|
|
"""Parse a manual format line like 'Movie A, 18:20 or 20:50, 1h 46m'."""
|
|
line = line.strip()
|
|
if not line or line.startswith("#"):
|
|
return None
|
|
|
|
parts = line.split(",")
|
|
if len(parts) < _MIN_MANUAL_LINE_PARTS:
|
|
msg = f"Invalid line format: {line}"
|
|
raise ValueError(msg)
|
|
|
|
movie = parts[0].strip()
|
|
times_str = parts[1].strip()
|
|
duration_str = ",".join(parts[2:]).strip()
|
|
|
|
start_times = [
|
|
parse_time(time_part)
|
|
for time_part in re.split(r"\s+or\s+", times_str, flags=re.IGNORECASE)
|
|
]
|
|
|
|
duration = parse_duration(duration_str)
|
|
|
|
return Movie(movie, start_times, duration)
|
|
|
|
|
|
def _try_parse_time(time_str: str) -> int | None:
|
|
"""Try to parse a time string, returning None on failure."""
|
|
try:
|
|
return parse_time(time_str)
|
|
except ValueError:
|
|
return None
|
|
|
|
|
|
def _try_parse_manual_line(
|
|
line: str,
|
|
error_stream: TextIO | None = None,
|
|
) -> Movie | None:
|
|
"""Try to parse a manual line, writing errors to error_stream."""
|
|
try:
|
|
return parse_manual_line(line)
|
|
except ValueError as e:
|
|
if error_stream is not None:
|
|
error_stream.write(f"Warning: {e}\n")
|
|
return None
|
|
|
|
|
|
def _try_parse_interactive_line(line: str) -> Movie | None:
|
|
"""Try to parse a line in interactive mode, logging errors."""
|
|
try:
|
|
result = parse_manual_line(line)
|
|
except ValueError:
|
|
logger.exception(" Error parsing input")
|
|
return None
|
|
if result:
|
|
logger.info(" Added: %s", result.name)
|
|
return result
|
|
|
|
|
|
def extract_date_from_html(content: str) -> str | None:
|
|
"""Extract schedule date from Cinema City HTML."""
|
|
# Look for date in YYYY-MM-DD format
|
|
match = re.search(r"(202\d-\d{2}-\d{2})", content)
|
|
if match:
|
|
return match.group(1)
|
|
return None
|
|
|
|
|
|
def parse_cinema_city_html(
|
|
filepath: str,
|
|
) -> tuple[list[Movie], str | None]:
|
|
"""Parse Cinema City HTML schedule.
|
|
|
|
Returns:
|
|
Tuple of (movies, date).
|
|
"""
|
|
with Path(filepath).open(encoding="utf-8") as f:
|
|
content = f.read()
|
|
|
|
movies: list[Movie] = []
|
|
schedule_date = extract_date_from_html(content)
|
|
|
|
# Split content by movie sections
|
|
sections = re.split(r'class="row movie-row', content)
|
|
|
|
for section in sections[1:]: # Skip first (before any movie)
|
|
# Get movie name
|
|
name_match = re.search(r'qb-movie-name">([^<]+)<', section)
|
|
if not name_match:
|
|
continue
|
|
movie_name = name_match.group(1).strip()
|
|
|
|
# Get genres
|
|
genre_match = re.search(
|
|
r'class="mr-sm"[^>]*>([^<]+)<\s*span', section
|
|
)
|
|
genres: list[str] = []
|
|
if genre_match:
|
|
genre_text = genre_match.group(1).strip()
|
|
genres = [
|
|
g.strip() for g in genre_text.split(",") if g.strip()
|
|
]
|
|
|
|
# Get duration
|
|
duration_match = re.search(r"(\d+)\s*min", section)
|
|
if not duration_match:
|
|
continue
|
|
duration = int(duration_match.group(1))
|
|
|
|
# Get screening times - look for time buttons
|
|
times = re.findall(
|
|
r'btn btn-primary btn-lg">\s*(\d{2}:\d{2})\s*<', section
|
|
)
|
|
if not times:
|
|
# Try alternate pattern
|
|
times = re.findall(
|
|
r">\s*(\d{2}:\d{2})\s*\(HTTPS://", section
|
|
)
|
|
|
|
if times:
|
|
start_times = list(dict.fromkeys(
|
|
parse_time(t) for t in times
|
|
))
|
|
movies.append(
|
|
Movie(movie_name, start_times, duration, genres),
|
|
)
|
|
|
|
# Deduplicate movies (same movie might appear multiple times)
|
|
seen: set[str] = set()
|
|
unique_movies: list[Movie] = []
|
|
for movie in movies:
|
|
if movie.name not in seen:
|
|
seen.add(movie.name)
|
|
unique_movies.append(movie)
|
|
|
|
return unique_movies, schedule_date
|
|
|
|
|
|
def parse_cinema_city_pdf(filepath: str) -> list[Movie]:
|
|
"""Parse Cinema City PDF schedule by extracting text."""
|
|
if _pdfplumber is not None:
|
|
with _pdfplumber.open(filepath) as pdf:
|
|
full_text = ""
|
|
for page in pdf.pages:
|
|
text = page.extract_text()
|
|
if text:
|
|
full_text += text + "\n"
|
|
return parse_cinema_city_text(full_text)
|
|
|
|
return _parse_cinema_city_pdf_basic(filepath)
|
|
|
|
|
|
def _parse_cinema_city_pdf_basic(filepath: str) -> list[Movie]:
|
|
"""Basic PDF parsing using PyMuPDF or falling back to subprocess."""
|
|
if _fitz is not None:
|
|
doc = _fitz.open(filepath)
|
|
full_text = ""
|
|
for page in doc:
|
|
full_text += page.get_text() + "\n"
|
|
doc.close()
|
|
return parse_cinema_city_text(full_text)
|
|
|
|
pdftotext_path = shutil.which("pdftotext")
|
|
if pdftotext_path is None:
|
|
_exit_no_pdf_support()
|
|
|
|
try:
|
|
result = subprocess.run(
|
|
[pdftotext_path, "-layout", filepath, "-"],
|
|
capture_output=True,
|
|
text=True,
|
|
check=True,
|
|
)
|
|
except subprocess.CalledProcessError:
|
|
_exit_no_pdf_support()
|
|
|
|
return parse_cinema_city_text(result.stdout)
|
|
|
|
|
|
def _exit_no_pdf_support() -> None:
|
|
"""Log PDF support error and exit."""
|
|
logger.error(
|
|
"Install pdfplumber, PyMuPDF, or poppler-utils for PDF support"
|
|
)
|
|
logger.error(" pip install pdfplumber")
|
|
logger.error(" pip install pymupdf")
|
|
logger.error(" pacman -S poppler")
|
|
sys.exit(1)
|
|
|
|
|
|
def parse_cinema_city_text(text: str) -> list[Movie]:
|
|
"""Parse Cinema City schedule from extracted text."""
|
|
movies: list[Movie] = []
|
|
lines = text.split("\n")
|
|
|
|
current_movie: str | None = None
|
|
current_duration: int | None = None
|
|
current_times: list[int] = []
|
|
|
|
# Patterns for movie titles (all caps, usually)
|
|
movie_title_pattern = re.compile(
|
|
r"^([A-ZĄĆĘŁŃÓŚŹŻ][A-ZĄĆĘŁŃÓŚŹŻ0-9\s:,\.\-\!\?\(\)]+)$"
|
|
)
|
|
duration_pattern = re.compile(r"(\d+)\s*min")
|
|
time_pattern = re.compile(r"\b(\d{1,2}:\d{2})\b")
|
|
|
|
for i, raw_line in enumerate(lines):
|
|
line = raw_line.strip()
|
|
|
|
if (
|
|
movie_title_pattern.match(line)
|
|
and len(line) > _MIN_TITLE_LENGTH
|
|
):
|
|
if current_movie and current_times:
|
|
movies.append(Movie(
|
|
current_movie,
|
|
list(dict.fromkeys(current_times)),
|
|
current_duration or _DEFAULT_MOVIE_DURATION,
|
|
))
|
|
|
|
current_movie = line.title()
|
|
current_times = []
|
|
current_duration = None
|
|
|
|
# Look ahead for duration
|
|
end = min(i + _TITLE_LOOKAHEAD_LINES, len(lines))
|
|
for j in range(i + 1, end):
|
|
dur_match = duration_pattern.search(lines[j])
|
|
if dur_match:
|
|
current_duration = int(dur_match.group(1))
|
|
break
|
|
|
|
if current_movie:
|
|
times_in_line = time_pattern.findall(line)
|
|
for t in times_in_line:
|
|
parsed = _try_parse_time(t)
|
|
if parsed is not None:
|
|
current_times.append(parsed)
|
|
|
|
# Save last movie
|
|
if current_movie and current_times:
|
|
movies.append(Movie(
|
|
current_movie,
|
|
list(dict.fromkeys(current_times)),
|
|
current_duration or _DEFAULT_MOVIE_DURATION,
|
|
))
|
|
|
|
return movies
|
|
|
|
|
|
def find_best_schedule(
|
|
movies: list[Movie],
|
|
buffer: int,
|
|
) -> list[list[Screening]]:
|
|
"""Find ALL schedules that maximize number of movies watched."""
|
|
movie_screenings: list[list[Screening]] = [
|
|
[
|
|
Screening(movie.name, start, start + movie.duration)
|
|
for start in movie.start_times
|
|
]
|
|
for movie in movies
|
|
]
|
|
|
|
best_count = 0
|
|
all_best_schedules: list[list[Screening]] = []
|
|
|
|
def _backtrack(
|
|
movie_idx: int,
|
|
current_schedule: list[Screening],
|
|
) -> None:
|
|
nonlocal best_count, all_best_schedules
|
|
|
|
if movie_idx == len(movie_screenings):
|
|
if len(current_schedule) > best_count:
|
|
best_count = len(current_schedule)
|
|
all_best_schedules = [current_schedule.copy()]
|
|
elif (
|
|
len(current_schedule) == best_count
|
|
and best_count > 0
|
|
):
|
|
all_best_schedules.append(current_schedule.copy())
|
|
return
|
|
|
|
# Pruning: can't beat the best
|
|
remaining = len(movie_screenings) - movie_idx
|
|
if len(current_schedule) + remaining < best_count:
|
|
return
|
|
|
|
# Try each screening of current movie
|
|
for screening in movie_screenings[movie_idx]:
|
|
conflicts = any(
|
|
screening.overlaps(s, buffer)
|
|
for s in current_schedule
|
|
)
|
|
if not conflicts:
|
|
current_schedule.append(screening)
|
|
_backtrack(movie_idx + 1, current_schedule)
|
|
current_schedule.pop()
|
|
|
|
# Also try skipping this movie
|
|
_backtrack(movie_idx + 1, current_schedule)
|
|
|
|
_backtrack(0, [])
|
|
|
|
# Sort each schedule by start time and return
|
|
return [
|
|
sorted(schedule, key=lambda s: s.start)
|
|
for schedule in all_best_schedules
|
|
]
|
|
|
|
|
|
def _format_single_schedule(
|
|
schedule: list[Screening],
|
|
output: TextIO,
|
|
) -> None:
|
|
"""Format a single schedule to the output stream."""
|
|
for i, screening in enumerate(schedule, 1):
|
|
duration = screening.end - screening.start
|
|
hours, mins = divmod(duration, 60)
|
|
actual_start = screening.start + ADS_DURATION
|
|
actual_start_str = (
|
|
f"{actual_start // 60:02d}:{actual_start % 60:02d}"
|
|
)
|
|
output.write(
|
|
f" {i}. {screening.start_str()} - "
|
|
f"{screening.end_str()} {screening.movie}\n"
|
|
)
|
|
output.write(
|
|
f" Duration: {hours}h {mins}m "
|
|
f"(movie starts ~{actual_start_str})\n"
|
|
)
|
|
if i < len(schedule):
|
|
gap = schedule[i].start - screening.end
|
|
if gap > 0:
|
|
output.write(f" [{gap} min break]\n")
|
|
output.write("\n")
|
|
|
|
|
|
def _format_schedules(
|
|
schedules: list[list[Screening]],
|
|
all_movies: list[str],
|
|
date: str | None = None,
|
|
max_display: int = 5,
|
|
*,
|
|
output: TextIO | None = None,
|
|
) -> None:
|
|
"""Format optimal schedules to the output stream."""
|
|
if output is None:
|
|
output = sys.stdout
|
|
|
|
sep = "=" * _SEPARATOR_WIDTH
|
|
thin_sep = "\u2500" * _SEPARATOR_WIDTH
|
|
|
|
if not schedules or not schedules[0]:
|
|
output.write("No movies can be scheduled!\n")
|
|
return
|
|
|
|
num_movies = len(schedules[0])
|
|
num_schedules = len(schedules)
|
|
|
|
output.write(f"\n{sep}\n")
|
|
if date:
|
|
output.write(f" OPTIMAL CINEMA SCHEDULES - {date}\n")
|
|
else:
|
|
output.write(" OPTIMAL CINEMA SCHEDULES\n")
|
|
output.write(
|
|
f" {num_movies} movies, "
|
|
f"{num_schedules} possible combination(s)\n"
|
|
)
|
|
output.write(f"{sep}\n\n")
|
|
|
|
display_count = min(num_schedules, max_display)
|
|
for idx, schedule in enumerate(schedules[:display_count], 1):
|
|
if num_schedules > 1:
|
|
output.write(f"{thin_sep}\n")
|
|
output.write(f" OPTION {idx}:\n")
|
|
output.write(f"{thin_sep}\n\n")
|
|
_format_single_schedule(schedule, output)
|
|
|
|
if num_schedules > display_count:
|
|
output.write(f"{thin_sep}\n")
|
|
output.write(
|
|
f" ... and {num_schedules - display_count} "
|
|
"more combinations\n"
|
|
)
|
|
output.write(" (use -n to show more, e.g., -n 10)\n")
|
|
output.write("\n")
|
|
|
|
# Show skipped movies (from first schedule as reference)
|
|
scheduled_movies = {s.movie for s in schedules[0]}
|
|
skipped = [m for m in all_movies if m not in scheduled_movies]
|
|
if skipped and num_schedules == 1:
|
|
output.write(f"{thin_sep}\n")
|
|
output.write(f" Skipped movies ({len(skipped)}):\n")
|
|
for movie in skipped:
|
|
output.write(f" - {movie}\n")
|
|
output.write("\n")
|
|
|
|
|
|
def _format_all_movies(
|
|
movies: list[Movie],
|
|
date: str | None = None,
|
|
*,
|
|
output: TextIO | None = None,
|
|
) -> None:
|
|
"""Format all parsed movies to the output stream."""
|
|
if output is None:
|
|
output = sys.stdout
|
|
|
|
thin_sep = "\u2500" * _SEPARATOR_WIDTH
|
|
|
|
output.write(f"\n{thin_sep}\n")
|
|
if date:
|
|
output.write(f" Parsed {len(movies)} movies for {date}:\n")
|
|
else:
|
|
output.write(f" Parsed {len(movies)} movies:\n")
|
|
output.write(f"{thin_sep}\n")
|
|
for movie in movies:
|
|
times_str = ", ".join(
|
|
f"{t // 60:02d}:{t % 60:02d}"
|
|
for t in sorted(movie.start_times)
|
|
)
|
|
genre_str = (
|
|
f" [{', '.join(movie.genres)}]" if movie.genres else ""
|
|
)
|
|
output.write(
|
|
f" {movie.name} ({movie.duration} min){genre_str}\n"
|
|
)
|
|
output.write(f" Times: {times_str}\n")
|
|
output.write("\n")
|
|
|
|
|
|
def _build_parser() -> argparse.ArgumentParser:
|
|
"""Build the argument parser for the cinema planner."""
|
|
parser = argparse.ArgumentParser(
|
|
description=(
|
|
"Plan your cinema day to watch "
|
|
"as many movies as possible."
|
|
),
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Supports Cinema City HTML/PDF schedules (auto-detected).
|
|
|
|
Manual input format (one movie per line):
|
|
Movie Title, start_time1 [or start_time2 ...], duration
|
|
|
|
Example:
|
|
Inception, 10:30 or 14:00 or 18:30, 2h 28m
|
|
The Matrix, 12:00 or 16:45, 2h 16m
|
|
""",
|
|
)
|
|
parser.add_argument(
|
|
"input_file", nargs="?", help="Input file (HTML/PDF/TXT)"
|
|
)
|
|
parser.add_argument(
|
|
"-b",
|
|
"--buffer",
|
|
type=int,
|
|
default=0,
|
|
help="Buffer time between movies in minutes (default: 0)",
|
|
)
|
|
parser.add_argument(
|
|
"-i",
|
|
"--interactive",
|
|
action="store_true",
|
|
help="Interactive mode - enter movies one by one",
|
|
)
|
|
parser.add_argument(
|
|
"-l",
|
|
"--list",
|
|
action="store_true",
|
|
help="List all parsed movies without scheduling",
|
|
)
|
|
parser.add_argument(
|
|
"-s",
|
|
"--select",
|
|
type=str,
|
|
help="Comma-separated movie names to include (partial match)",
|
|
)
|
|
parser.add_argument(
|
|
"-x",
|
|
"--exclude",
|
|
type=str,
|
|
help="Comma-separated movie names to exclude (partial match)",
|
|
)
|
|
parser.add_argument(
|
|
"-g",
|
|
"--exclude-genre",
|
|
type=str,
|
|
help="Comma-separated genres to exclude (e.g., 'Horror')",
|
|
)
|
|
parser.add_argument(
|
|
"--all-genres",
|
|
action="store_true",
|
|
help="Include all genres (disable default Horror exclusion)",
|
|
)
|
|
parser.add_argument(
|
|
"-o",
|
|
"--output",
|
|
type=str,
|
|
help="Save schedule to file (default: cinema_plan_DATE.txt)",
|
|
)
|
|
parser.add_argument(
|
|
"-n",
|
|
"--max-schedules",
|
|
type=int,
|
|
default=5,
|
|
help="Max schedule options to display (default: 5)",
|
|
)
|
|
parser.add_argument(
|
|
"-m",
|
|
"--must-watch",
|
|
type=str,
|
|
help="Only show schedules containing this movie (partial match)",
|
|
)
|
|
return parser
|
|
|
|
|
|
def _load_movies_interactive() -> list[Movie]:
|
|
"""Load movies through interactive terminal input."""
|
|
logger.info("Enter movies (empty line to finish):")
|
|
logger.info("Format: Title, start1 [or start2 ...], duration")
|
|
logger.info("Example: Inception, 10:30 or 14:00, 2h 28m")
|
|
logger.info("")
|
|
movies: list[Movie] = []
|
|
with suppress(EOFError):
|
|
while True:
|
|
line = input("> ")
|
|
if not line.strip():
|
|
break
|
|
result = _try_parse_interactive_line(line)
|
|
if result:
|
|
movies.append(result)
|
|
return movies
|
|
|
|
|
|
def _load_movies_from_file(
|
|
filepath: Path,
|
|
) -> tuple[list[Movie], str | None]:
|
|
"""Load movies from a file (HTML, PDF, or manual format)."""
|
|
suffix = filepath.suffix.lower()
|
|
logger.info("Parsing: %s", filepath)
|
|
|
|
if suffix in {".html", ".htm"}:
|
|
return parse_cinema_city_html(str(filepath))
|
|
|
|
if suffix == ".pdf":
|
|
return parse_cinema_city_pdf(str(filepath)), None
|
|
|
|
movies: list[Movie] = []
|
|
with filepath.open() as f:
|
|
for line in f:
|
|
result = _try_parse_manual_line(line, sys.stderr)
|
|
if result:
|
|
movies.append(result)
|
|
return movies, None
|
|
|
|
|
|
def _load_movies_from_stdin() -> list[Movie]:
|
|
"""Load movies from standard input."""
|
|
logger.info("Enter movies (Ctrl+D when done):")
|
|
movies: list[Movie] = []
|
|
for line in sys.stdin:
|
|
result = _try_parse_manual_line(line, sys.stderr)
|
|
if result:
|
|
movies.append(result)
|
|
return movies
|
|
|
|
|
|
def _filter_movies(
|
|
movies: list[Movie],
|
|
args: argparse.Namespace,
|
|
) -> tuple[list[Movie], set[str]]:
|
|
"""Apply name and genre filters to movies."""
|
|
if args.select:
|
|
select_terms = [
|
|
t.strip().lower() for t in args.select.split(",")
|
|
]
|
|
movies = [
|
|
m
|
|
for m in movies
|
|
if any(t in m.name.lower() for t in select_terms)
|
|
]
|
|
logger.info(
|
|
"Selected %d movies matching: %s",
|
|
len(movies),
|
|
args.select,
|
|
)
|
|
|
|
if args.exclude:
|
|
exclude_terms = [
|
|
t.strip().lower() for t in args.exclude.split(",")
|
|
]
|
|
movies = [
|
|
m
|
|
for m in movies
|
|
if not any(t in m.name.lower() for t in exclude_terms)
|
|
]
|
|
logger.info("After name exclusion: %d movies", len(movies))
|
|
|
|
excluded_genres: set[str] = set()
|
|
if not args.all_genres:
|
|
excluded_genres.update(DEFAULT_EXCLUDED_GENRES)
|
|
if args.exclude_genre:
|
|
excluded_genres.update(
|
|
g.strip().lower() for g in args.exclude_genre.split(",")
|
|
)
|
|
|
|
if excluded_genres:
|
|
before_count = len(movies)
|
|
movies = [
|
|
m
|
|
for m in movies
|
|
if not any(
|
|
g.lower() in excluded_genres for g in m.genres
|
|
)
|
|
]
|
|
filtered_count = before_count - len(movies)
|
|
if filtered_count > 0:
|
|
logger.info(
|
|
"Excluded %d movies by genre: %s",
|
|
filtered_count,
|
|
", ".join(sorted(excluded_genres)),
|
|
)
|
|
|
|
return movies, excluded_genres
|
|
|
|
|
|
def _apply_must_watch_filter(
|
|
schedules: list[list[Screening]],
|
|
must_watch: str,
|
|
) -> list[list[Screening]]:
|
|
"""Filter schedules to only those containing must-watch movie."""
|
|
must_watch_lower = must_watch.lower()
|
|
filtered = [
|
|
s
|
|
for s in schedules
|
|
if any(
|
|
must_watch_lower in screening.movie.lower()
|
|
for screening in s
|
|
)
|
|
]
|
|
if filtered:
|
|
logger.info(
|
|
"Filtered to %d schedules containing '%s'",
|
|
len(filtered),
|
|
must_watch,
|
|
)
|
|
return filtered
|
|
|
|
logger.warning(
|
|
"No optimal schedules contain '%s'", must_watch
|
|
)
|
|
logger.warning("Showing all schedules instead.")
|
|
return schedules
|
|
|
|
|
|
def _output_schedules(
|
|
schedules: list[list[Screening]],
|
|
all_movie_names: list[str],
|
|
schedule_date: str | None,
|
|
args: argparse.Namespace,
|
|
excluded_genres: set[str],
|
|
) -> None:
|
|
"""Handle schedule output, optionally saving to file."""
|
|
output_buffer = StringIO()
|
|
_format_schedules(
|
|
schedules,
|
|
all_movie_names,
|
|
schedule_date,
|
|
args.max_schedules,
|
|
output=output_buffer,
|
|
)
|
|
schedule_output = output_buffer.getvalue()
|
|
sys.stdout.write(schedule_output)
|
|
|
|
if args.output or schedule_date:
|
|
output_file = (
|
|
Path(args.output)
|
|
if args.output
|
|
else Path(f"cinema_plan_{schedule_date}.txt")
|
|
)
|
|
with output_file.open("w") as f:
|
|
f.write(
|
|
f"Generated: {schedule_date or 'unknown date'}\n"
|
|
)
|
|
f.write(f"Movies considered: {len(all_movie_names)}\n")
|
|
f.write(f"Buffer time: {args.buffer} minutes\n")
|
|
if excluded_genres:
|
|
f.write(
|
|
"Excluded genres: "
|
|
f"{', '.join(sorted(excluded_genres))}\n"
|
|
)
|
|
f.write(schedule_output)
|
|
logger.info("Schedule saved to: %s", output_file)
|
|
|
|
|
|
def main() -> None:
|
|
"""Run the cinema day planner CLI."""
|
|
logging.basicConfig(format="%(message)s", level=logging.INFO)
|
|
|
|
parser = _build_parser()
|
|
args = parser.parse_args()
|
|
|
|
movies: list[Movie] = []
|
|
schedule_date: str | None = None
|
|
|
|
if args.interactive:
|
|
movies = _load_movies_interactive()
|
|
elif args.input_file:
|
|
movies, schedule_date = _load_movies_from_file(
|
|
Path(args.input_file),
|
|
)
|
|
else:
|
|
movies = _load_movies_from_stdin()
|
|
|
|
if not movies:
|
|
logger.error("No movies found!")
|
|
sys.exit(1)
|
|
|
|
movies, excluded_genres = _filter_movies(movies, args)
|
|
|
|
if args.list:
|
|
_format_all_movies(movies, schedule_date)
|
|
return
|
|
|
|
logger.info(
|
|
"\nOptimizing schedule for %d movies...", len(movies)
|
|
)
|
|
logger.info(
|
|
"Buffer time between movies: %d minutes", args.buffer
|
|
)
|
|
|
|
schedules = find_best_schedule(movies, args.buffer)
|
|
all_movie_names = [m.name for m in movies]
|
|
|
|
if args.must_watch:
|
|
schedules = _apply_must_watch_filter(
|
|
schedules, args.must_watch
|
|
)
|
|
|
|
_output_schedules(
|
|
schedules,
|
|
all_movie_names,
|
|
schedule_date,
|
|
args,
|
|
excluded_genres,
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|