From dd8763f30ff01363435a27e25eae24802849c155 Mon Sep 17 00:00:00 2001 From: Krzysztof Rudnicki Date: Sat, 6 Sep 2025 14:00:34 +0200 Subject: [PATCH] feat: added script to extract links form html --- C/imageViewer/install_arch.sh | 59 +++- C/imageViewer/main.c | 414 ++++++++++++++++++++++++- PYTHON/.gitignore | 216 +++++++++++++ PYTHON/extractLinks/main.py | 90 ++++++ PYTHON/extractLinks/pytest.ini | 2 + PYTHON/extractLinks/run.sh | 14 + PYTHON/extractLinks/tests/sample2.html | 15 + PYTHON/extractLinks/tests/test_main.py | 70 +++++ 8 files changed, 849 insertions(+), 31 deletions(-) create mode 100644 PYTHON/.gitignore create mode 100644 PYTHON/extractLinks/main.py create mode 100644 PYTHON/extractLinks/pytest.ini create mode 100755 PYTHON/extractLinks/run.sh create mode 100644 PYTHON/extractLinks/tests/sample2.html create mode 100644 PYTHON/extractLinks/tests/test_main.py diff --git a/C/imageViewer/install_arch.sh b/C/imageViewer/install_arch.sh index c3649aa..f983792 100755 --- a/C/imageViewer/install_arch.sh +++ b/C/imageViewer/install_arch.sh @@ -50,38 +50,63 @@ check_permissions() { install_dependencies() { print_step "Checking dependencies..." - + # Check if pacman is available if ! command -v pacman &> /dev/null; then print_error "pacman not found. Are you sure this is Arch Linux?" exit 1 fi - - # Check if required packages are already installed + + # Define required packages and show what we will check local packages=("sdl2" "sdl2_image" "gcc" "make" "pkg-config" "xdg-utils") + print_step "Packages to verify: ${packages[*]}" + + # Determine missing packages in a single call (faster than looping) + # pacman -T lists targets that are not currently installed + local missing_output + missing_output=$(pacman -T "${packages[@]}" 2>/dev/null || true) + + # Build arrays for missing and installed for better UX local missing_packages=() - - for package in "${packages[@]}"; do - if ! pacman -Q "$package" &> /dev/null; then - missing_packages+=("$package") + local installed_packages=() + if [[ -n "$missing_output" ]]; then + # Populate missing_packages from pacman -T output + while IFS= read -r line; do + [[ -n "$line" ]] && missing_packages+=("$line") + done <<< "$missing_output" + fi + + # Derive installed packages by set subtraction + for pkg in "${packages[@]}"; do + local found_missing=0 + for miss in "${missing_packages[@]}"; do + if [[ "$pkg" == "$miss" ]]; then + found_missing=1 + break + fi + done + if [[ $found_missing -eq 0 ]]; then + installed_packages+=("$pkg") fi done - - if [ ${#missing_packages[@]} -eq 0 ]; then + + # Verbose summary + if [[ ${#installed_packages[@]} -gt 0 ]]; then + print_success "Already installed: ${installed_packages[*]}" + fi + if [[ ${#missing_packages[@]} -eq 0 ]]; then print_success "All dependencies are already installed" return 0 + else + print_warning "Missing packages: ${missing_packages[*]}" fi - - print_step "Installing missing dependencies: ${missing_packages[*]}" - - # Update package database + print_step "Updating package database..." sudo pacman -Sy - - # Install required packages - print_step "Installing SDL2 libraries..." + + print_step "Installing missing dependencies..." sudo pacman -S --needed "${missing_packages[@]}" - + print_success "Dependencies installed successfully" } diff --git a/C/imageViewer/main.c b/C/imageViewer/main.c index 898799f..e0d6106 100644 --- a/C/imageViewer/main.c +++ b/C/imageViewer/main.c @@ -30,9 +30,15 @@ typedef struct { SDL_Window *window; SDL_Renderer *renderer; SDL_Texture *texture; + SDL_Surface *original_surface; // kept for saving rotated output char current_file[MAX_PATH_LEN]; int image_width; int image_height; + // Trimming (crop) amounts in pixels from each side (applied before rotation) + int trim_left; + int trim_right; + int trim_top; + int trim_bottom; float zoom_factor; int offset_x; int offset_y; @@ -46,6 +52,9 @@ typedef struct { int right_key_held; Uint32 last_auto_nav_time; Uint32 auto_nav_interval; // milliseconds + + // Rotation state (degrees, multiples of 90) + int rotation_degrees; } ImageViewer; // Function declarations @@ -59,6 +68,12 @@ static int navigate_prev_image(ImageViewer *viewer); static void print_current_image_info(const ImageViewer *viewer); static void handle_auto_navigation(ImageViewer *viewer); +// Rotation/saving helpers +static SDL_Surface *rotate_surface_90_cw(SDL_Surface *src); +static SDL_Surface *rotate_surface_quarters(SDL_Surface *src, int quartersCW); +static SDL_Surface *crop_surface_argb8888(SDL_Surface *src, int left, int top, int right, int bottom); +static int save_processed_image(const ImageViewer *viewer); + // Safe memory copy wrapper to address static analyzer warnings static int safe_copy_memory(void *dest, size_t dest_size, const void *src, size_t src_len) { if (!dest || !src || dest_size == 0 || src_len == 0) { @@ -146,13 +161,19 @@ static int init_viewer(ImageViewer *viewer) { } viewer->texture = NULL; + viewer->original_surface = NULL; viewer->current_file[0] = '\0'; viewer->zoom_factor = 1.0f; + viewer->trim_left = 0; + viewer->trim_right = 0; + viewer->trim_top = 0; + viewer->trim_bottom = 0; viewer->offset_x = 0; viewer->offset_y = 0; viewer->dragging = 0; viewer->image_width = 0; viewer->image_height = 0; + viewer->rotation_degrees = 0; // Initialize file list viewer->file_list.files = NULL; @@ -174,6 +195,10 @@ static int load_image(ImageViewer *viewer, const char *filename) { SDL_DestroyTexture(viewer->texture); viewer->texture = NULL; } + if (viewer->original_surface) { + SDL_FreeSurface(viewer->original_surface); + viewer->original_surface = NULL; + } SDL_Surface *surface = IMG_Load(filename); if (!surface) { @@ -181,15 +206,27 @@ static int load_image(ImageViewer *viewer, const char *filename) { return 0; } - viewer->texture = SDL_CreateTextureFromSurface(viewer->renderer, surface); - if (!viewer->texture) { - printf("Unable to create texture from %s! SDL_Error: %s\n", filename, SDL_GetError()); + // Convert to a known format for safe rotation/saving + SDL_Surface *converted = SDL_ConvertSurfaceFormat(surface, SDL_PIXELFORMAT_ARGB8888, 0); + if (!converted) { + printf("Unable to convert surface for %s! SDL_Error: %s\n", filename, SDL_GetError()); SDL_FreeSurface(surface); return 0; } - viewer->image_width = surface->w; - viewer->image_height = surface->h; + viewer->texture = SDL_CreateTextureFromSurface(viewer->renderer, converted); + if (!viewer->texture) { + printf("Unable to create texture from %s! SDL_Error: %s\n", filename, SDL_GetError()); + SDL_FreeSurface(converted); + SDL_FreeSurface(surface); + return 0; + } + + viewer->image_width = converted->w; + viewer->image_height = converted->h; + + // Keep the converted surface for saving later + viewer->original_surface = converted; SDL_FreeSurface(surface); @@ -200,8 +237,14 @@ static int load_image(ImageViewer *viewer, const char *filename) { } viewer->zoom_factor = 1.0f; + // Reset trims on new image + viewer->trim_left = 0; + viewer->trim_right = 0; + viewer->trim_top = 0; + viewer->trim_bottom = 0; viewer->offset_x = 0; viewer->offset_y = 0; + viewer->rotation_degrees = 0; // reset rotation on new image int window_w, window_h; SDL_GetWindowSize(viewer->window, &window_w, &window_h); @@ -228,8 +271,32 @@ static void render_image(ImageViewer *viewer) { return; } - int scaled_width = (int)(viewer->image_width * viewer->zoom_factor); - int scaled_height = (int)(viewer->image_height * viewer->zoom_factor); + int base_w = viewer->image_width; + int base_h = viewer->image_height; + + // Compute effective source rect based on trims (clamp to valid range) + int left = viewer->trim_left < 0 ? 0 : viewer->trim_left; + int right = viewer->trim_right < 0 ? 0 : viewer->trim_right; + int top = viewer->trim_top < 0 ? 0 : viewer->trim_top; + int bottom = viewer->trim_bottom < 0 ? 0 : viewer->trim_bottom; + if (left + right >= base_w) { + int excess = left + right - (base_w - 1); + if (right >= excess) right -= excess; else left -= (excess - right); + } + if (top + bottom >= base_h) { + int excess = top + bottom - (base_h - 1); + if (bottom >= excess) bottom -= excess; else top -= (excess - bottom); + } + SDL_Rect src_rect; + src_rect.x = left; + src_rect.y = top; + src_rect.w = base_w - left - right; + src_rect.h = base_h - top - bottom; + if (src_rect.w <= 0) src_rect.w = 1; + if (src_rect.h <= 0) src_rect.h = 1; + + int scaled_width = (int)(src_rect.w * viewer->zoom_factor); + int scaled_height = (int)(src_rect.h * viewer->zoom_factor); int window_w, window_h; SDL_GetWindowSize(viewer->window, &window_w, &window_h); @@ -238,7 +305,7 @@ static void render_image(ImageViewer *viewer) { int y = (window_h - scaled_height) / 2 + viewer->offset_y; SDL_Rect dest_rect = {x, y, scaled_width, scaled_height}; - SDL_RenderCopy(viewer->renderer, viewer->texture, NULL, &dest_rect); + SDL_RenderCopyEx(viewer->renderer, viewer->texture, &src_rect, &dest_rect, (double)viewer->rotation_degrees, NULL, SDL_FLIP_NONE); SDL_RenderPresent(viewer->renderer); } @@ -272,6 +339,11 @@ static void print_help() { printf("Mouse drag: Pan image\n"); printf("Left/Right Arrow: Navigate between images\n"); printf("Hold Left/Right Arrow: Auto-navigate every second\n"); + printf("[ / ]: Rotate left/right by 90 degrees\n"); + printf("Trim (per side, step 10px; hold Shift for 50px):\n"); + printf(" 1/2: Left -/+ 3/4: Right -/+ 5/6: Top -/+ 7/8: Bottom -/+\n"); + printf(" T: Reset all trims to 0\n"); + printf("Ctrl+S: Save trimmed (and rotated, if applied) image next to the original\n"); printf("R: Reset zoom and position\n"); printf("F: Fit image to window\n"); printf("H: Show this help\n"); @@ -283,6 +355,10 @@ static void cleanup_viewer(ImageViewer *viewer) { if (viewer->texture) { SDL_DestroyTexture(viewer->texture); } + if (viewer->original_surface) { + SDL_FreeSurface(viewer->original_surface); + viewer->original_surface = NULL; + } if (viewer->renderer) { SDL_DestroyRenderer(viewer->renderer); } @@ -730,9 +806,12 @@ int main(int argc, char *argv[]) { case SDLK_f: { int window_w, window_h; SDL_GetWindowSize(viewer.window, &window_w, &window_h); - - float scale_x = (float)window_w / viewer.image_width; - float scale_y = (float)window_h / viewer.image_height; + int eff_w = viewer.image_width - viewer.trim_left - viewer.trim_right; + int eff_h = viewer.image_height - viewer.trim_top - viewer.trim_bottom; + if (eff_w < 1) eff_w = 1; + if (eff_h < 1) eff_h = 1; + float scale_x = (float)window_w / eff_w; + float scale_y = (float)window_h / eff_h; viewer.zoom_factor = (scale_x < scale_y) ? scale_x : scale_y; viewer.offset_x = 0; viewer.offset_y = 0; @@ -754,6 +833,95 @@ int main(int argc, char *argv[]) { print_help(); break; + // Trimming controls: per-side -/+ with number keys; Shift = larger step + case SDLK_1: + case SDLK_2: + case SDLK_3: + case SDLK_4: + case SDLK_5: + case SDLK_6: + case SDLK_7: + case SDLK_8: { + int step = (SDL_GetModState() & KMOD_SHIFT) ? 50 : 10; + int iw = viewer.image_width; + int ih = viewer.image_height; + if (iw <= 0 || ih <= 0) break; + switch (e.key.keysym.sym) { + case SDLK_1: // left - + viewer.trim_left -= step; + if (viewer.trim_left < 0) viewer.trim_left = 0; + break; + case SDLK_2: // left + + viewer.trim_left += step; + break; + case SDLK_3: // right - + viewer.trim_right -= step; + if (viewer.trim_right < 0) viewer.trim_right = 0; + break; + case SDLK_4: // right + + viewer.trim_right += step; + break; + case SDLK_5: // top - + viewer.trim_top -= step; + if (viewer.trim_top < 0) viewer.trim_top = 0; + break; + case SDLK_6: // top + + viewer.trim_top += step; + break; + case SDLK_7: // bottom - + viewer.trim_bottom -= step; + if (viewer.trim_bottom < 0) viewer.trim_bottom = 0; + break; + case SDLK_8: // bottom + + viewer.trim_bottom += step; + break; + } + // Clamp so at least 1px remains + if (viewer.trim_left + viewer.trim_right >= iw) { + viewer.trim_right = iw - 1 - viewer.trim_left; + if (viewer.trim_right < 0) viewer.trim_right = 0; + if (viewer.trim_left >= iw) viewer.trim_left = iw - 1; + } + if (viewer.trim_top + viewer.trim_bottom >= ih) { + viewer.trim_bottom = ih - 1 - viewer.trim_top; + if (viewer.trim_bottom < 0) viewer.trim_bottom = 0; + if (viewer.trim_top >= ih) viewer.trim_top = ih - 1; + } + int eff_w = iw - viewer.trim_left - viewer.trim_right; + int eff_h = ih - viewer.trim_top - viewer.trim_bottom; + printf("Trim L/R/T/B: %d/%d/%d/%d (effective %dx%d)\n", + viewer.trim_left, viewer.trim_right, viewer.trim_top, viewer.trim_bottom, + eff_w, eff_h); + } break; + + case SDLK_t: // reset trimming + viewer.trim_left = viewer.trim_right = viewer.trim_top = viewer.trim_bottom = 0; + printf("Trims reset.\n"); + break; + + case SDLK_LEFTBRACKET: { // '[' rotate left 90 + viewer.rotation_degrees -= 90; + if (viewer.rotation_degrees <= -360) + viewer.rotation_degrees = 0; + printf("Rotation: %d degrees\n", ((viewer.rotation_degrees%360)+360)%360); + } break; + + case SDLK_RIGHTBRACKET: { // ']' rotate right 90 + viewer.rotation_degrees += 90; + if (viewer.rotation_degrees >= 360) + viewer.rotation_degrees = 0; + printf("Rotation: %d degrees\n", ((viewer.rotation_degrees%360)+360)%360); + } break; + + case SDLK_s: { + const Uint16 mods = SDL_GetModState(); + if (mods & KMOD_CTRL) { + if (!save_processed_image(&viewer)) { + printf("Failed to save image.\n"); + } + } + } break; + case SDLK_LEFT: if (!viewer.left_key_held) { // First press - immediate navigation @@ -832,9 +1000,12 @@ int main(int argc, char *argv[]) { // Recalculate auto-scaling for new window size int window_w = e.window.data1; int window_h = e.window.data2; - - float scale_x = (float)window_w / viewer.image_width; - float scale_y = (float)window_h / viewer.image_height; + int eff_w = viewer.image_width - viewer.trim_left - viewer.trim_right; + int eff_h = viewer.image_height - viewer.trim_top - viewer.trim_bottom; + if (eff_w < 1) eff_w = 1; + if (eff_h < 1) eff_h = 1; + float scale_x = (float)window_w / eff_w; + float scale_y = (float)window_h / eff_h; float auto_scale = (scale_x < scale_y) ? scale_x : scale_y; // Only scale down if image is larger than window, never scale up @@ -864,4 +1035,219 @@ int main(int argc, char *argv[]) { cleanup_viewer(&viewer); printf("Image viewer closed.\n"); return 0; +} + +// Rotate ARGB8888 surface 90 degrees clockwise +static SDL_Surface *rotate_surface_90_cw(SDL_Surface *src) { + if (!src) return NULL; + int allocated_conv = 0; + SDL_Surface *work = src; + if (src->format->format != SDL_PIXELFORMAT_ARGB8888) { + work = SDL_ConvertSurfaceFormat(src, SDL_PIXELFORMAT_ARGB8888, 0); + if (!work) return NULL; + allocated_conv = 1; + } + + int src_w = work->w; + int src_h = work->h; + SDL_Surface *dest = SDL_CreateRGBSurfaceWithFormat(0, src_h, src_w, 32, SDL_PIXELFORMAT_ARGB8888); + if (!dest) { + if (allocated_conv) SDL_FreeSurface(work); + return NULL; + } + + if (SDL_MUSTLOCK(work)) SDL_LockSurface(work); + if (SDL_MUSTLOCK(dest)) SDL_LockSurface(dest); + + Uint32 *src_pixels = (Uint32 *)work->pixels; + Uint32 *dst_pixels = (Uint32 *)dest->pixels; + int src_pitch_px = work->pitch / 4; + int dst_pitch_px = dest->pitch / 4; + + for (int y = 0; y < src_h; ++y) { + for (int x = 0; x < src_w; ++x) { + Uint32 pixel = src_pixels[y * src_pitch_px + x]; + int nx = src_h - 1 - y; + int ny = x; + dst_pixels[ny * dst_pitch_px + nx] = pixel; + } + } + + if (SDL_MUSTLOCK(dest)) SDL_UnlockSurface(dest); + if (SDL_MUSTLOCK(work)) SDL_UnlockSurface(work); + if (allocated_conv) SDL_FreeSurface(work); + return dest; +} + +static SDL_Surface *rotate_surface_quarters(SDL_Surface *src, int quartersCW) { + quartersCW = ((quartersCW % 4) + 4) % 4; + if (quartersCW == 0) { + // Return a duplicate to avoid accidental modifications to original + SDL_Surface *dup = SDL_ConvertSurfaceFormat(src, SDL_PIXELFORMAT_ARGB8888, 0); + return dup; + } + + SDL_Surface *current = SDL_ConvertSurfaceFormat(src, SDL_PIXELFORMAT_ARGB8888, 0); + if (!current) return NULL; + for (int i = 0; i < quartersCW; ++i) { + SDL_Surface *next = rotate_surface_90_cw(current); + SDL_FreeSurface(current); + if (!next) return NULL; + current = next; + } + return current; +} + +// Crop ARGB8888 surface by trimming pixels from each side; returns new surface +static SDL_Surface *crop_surface_argb8888(SDL_Surface *src, int left, int top, int right, int bottom) { + if (!src) return NULL; + SDL_Surface *work = src; + int free_work = 0; + if (src->format->format != SDL_PIXELFORMAT_ARGB8888) { + work = SDL_ConvertSurfaceFormat(src, SDL_PIXELFORMAT_ARGB8888, 0); + if (!work) return NULL; + free_work = 1; + } + + int iw = work->w; + int ih = work->h; + if (left < 0) left = 0; + if (right < 0) right = 0; + if (top < 0) top = 0; + if (bottom < 0) bottom = 0; + if (left + right >= iw) right = iw - 1 - left; + if (top + bottom >= ih) bottom = ih - 1 - top; + int cw = iw - left - right; + int ch = ih - top - bottom; + if (cw < 1) cw = 1; + if (ch < 1) ch = 1; + + SDL_Surface *out = SDL_CreateRGBSurfaceWithFormat(0, cw, ch, 32, SDL_PIXELFORMAT_ARGB8888); + if (!out) { + if (free_work) SDL_FreeSurface(work); + return NULL; + } + if (SDL_MUSTLOCK(work)) SDL_LockSurface(work); + if (SDL_MUSTLOCK(out)) SDL_LockSurface(out); + Uint32 *sp = (Uint32 *)work->pixels; + Uint32 *dp = (Uint32 *)out->pixels; + int sp_pitch = work->pitch / 4; + int dp_pitch = out->pitch / 4; + for (int y = 0; y < ch; ++y) { + memcpy(&dp[y * dp_pitch], &sp[(y + top) * sp_pitch + left], (size_t)cw * 4); + } + if (SDL_MUSTLOCK(out)) SDL_UnlockSurface(out); + if (SDL_MUSTLOCK(work)) SDL_UnlockSurface(work); + if (free_work) SDL_FreeSurface(work); + return out; +} + +static int save_processed_image(const ImageViewer *viewer) { + if (!viewer->original_surface) { + printf("No image loaded to save.\n"); + return 0; + } + + // First, crop based on current trims (before rotation to match on-screen behavior) + SDL_Surface *cropped = crop_surface_argb8888( + viewer->original_surface, + viewer->trim_left, viewer->trim_top, viewer->trim_right, viewer->trim_bottom); + if (!cropped) { + printf("Failed to crop surface for saving.\n"); + return 0; + } + + int rot = ((viewer->rotation_degrees % 360) + 360) % 360; + int quarters = rot / 90; + + SDL_Surface *save_surf = NULL; + if (quarters == 0) { + save_surf = cropped; // already ARGB8888 + } else { + save_surf = rotate_surface_quarters(cropped, quarters); + SDL_FreeSurface(cropped); + if (!save_surf) { + printf("Failed to rotate cropped surface for saving.\n"); + return 0; + } + } + if (!save_surf) { + printf("Failed to prepare rotated surface for saving.\n"); + return 0; + } + + // Build output path based on original extension: /_rotated. + const char *orig_name = viewer->file_list.files[viewer->file_list.current_index]; + char name_wo_ext[MAX_PATH_LEN]; + size_t len = strlen(orig_name); + if (!safe_copy_string(name_wo_ext, sizeof name_wo_ext, orig_name, len)) { + SDL_FreeSurface(save_surf); + return 0; + } + const char *ext_ptr = strrchr(orig_name, '.'); + char ext_lower[16] = {0}; + if (ext_ptr && *(ext_ptr + 1) != '\0') { + ext_ptr++; // skip dot + size_t eLen = strlen(ext_ptr); + if (eLen >= sizeof(ext_lower)) eLen = sizeof(ext_lower) - 1; + for (size_t i = 0; i < eLen; ++i) { + char c = ext_ptr[i]; + if (c >= 'A' && c <= 'Z') c = (char)(c - 'A' + 'a'); + ext_lower[i] = c; + } + } else { + // default to png if no extension + strcpy(ext_lower, "png"); + } + + // Trim name_wo_ext at last dot to remove extension + char *dot = strrchr(name_wo_ext, '.'); + if (dot) *dot = '\0'; + + char out_path[MAX_PATH_LEN * 2]; + char fname[MAX_PATH_LEN]; + + // Decide saving function by extension; fallback to png if unsupported + int saved = 0; + int fallback_png = 0; + int any_trim = (viewer->trim_left | viewer->trim_right | viewer->trim_top | viewer->trim_bottom) != 0; + + if (strcmp(ext_lower, "png") == 0) { + int n = snprintf(fname, sizeof fname, "%s_%s.png", name_wo_ext, any_trim ? "trimmed" : "rotated"); + if (n >= 0 && (size_t)n < sizeof fname && + safe_format_path(out_path, sizeof out_path, viewer->file_list.base_dir, fname)) { + if (IMG_SavePNG(save_surf, out_path) == 0) saved = 1; + } + } else if (strcmp(ext_lower, "jpg") == 0 || strcmp(ext_lower, "jpeg") == 0) { + int n = snprintf(fname, sizeof fname, "%s_%s.%s", name_wo_ext, any_trim ? "trimmed" : "rotated", ext_lower); + if (n >= 0 && (size_t)n < sizeof fname && + safe_format_path(out_path, sizeof out_path, viewer->file_list.base_dir, fname)) { + if (IMG_SaveJPG(save_surf, out_path, 90) == 0) saved = 1; + } + } else if (strcmp(ext_lower, "bmp") == 0) { + int n = snprintf(fname, sizeof fname, "%s_%s.bmp", name_wo_ext, any_trim ? "trimmed" : "rotated"); + if (n >= 0 && (size_t)n < sizeof fname && + safe_format_path(out_path, sizeof out_path, viewer->file_list.base_dir, fname)) { + if (SDL_SaveBMP(save_surf, out_path) == 0) saved = 1; + } + } else { + // Unsupported original extension for saving -> fallback to PNG + int n = snprintf(fname, sizeof fname, "%s_%s.png", name_wo_ext, any_trim ? "trimmed" : "rotated"); + if (n >= 0 && (size_t)n < sizeof fname && + safe_format_path(out_path, sizeof out_path, viewer->file_list.base_dir, fname)) { + if (IMG_SavePNG(save_surf, out_path) == 0) { saved = 1; fallback_png = 1; } + } + } + + SDL_FreeSurface(save_surf); + if (!saved) { + printf("Failed to save rotated image (unsupported format or IO error).\n"); + return 0; + } + if (fallback_png) { + printf("Saved %s image (fallback PNG): %s\n", any_trim ? "trimmed" : "rotated", out_path); + } else { + printf("Saved %s image: %s\n", any_trim ? "trimmed" : "rotated", out_path); + } + return 1; } \ No newline at end of file diff --git a/PYTHON/.gitignore b/PYTHON/.gitignore new file mode 100644 index 0000000..291fd4f --- /dev/null +++ b/PYTHON/.gitignore @@ -0,0 +1,216 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[codz] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py.cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# UV +# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +#uv.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock +#poetry.toml + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python. +# https://pdm-project.org/en/latest/usage/project/#working-with-version-control +#pdm.lock +#pdm.toml +.pdm-python +.pdm-build/ + +# pixi +# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control. +#pixi.lock +# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one +# in the .venv directory. It is recommended not to include this directory in version control. +.pixi + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# Redis +*.rdb +*.aof +*.pid + +# RabbitMQ +mnesia/ +rabbitmq/ +rabbitmq-data/ + +# ActiveMQ +activemq-data/ + +# SageMath parsed files +*.sage.py + +# Environments +.env +.envrc +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# Abstra +# Abstra is an AI-powered process automation framework. +# Ignore directories containing user credentials, local state, and settings. +# Learn more at https://abstra.io/docs +.abstra/ + +# Visual Studio Code +# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore +# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore +# and can be added to the global gitignore or merged into this file. However, if you prefer, +# you could uncomment the following to ignore the entire vscode folder +# .vscode/ + +# Ruff stuff: +.ruff_cache/ + +# PyPI configuration file +.pypirc + +# Marimo +marimo/_static/ +marimo/_lsp/ +__marimo__/ + +# Streamlit +.streamlit/secrets.toml \ No newline at end of file diff --git a/PYTHON/extractLinks/main.py b/PYTHON/extractLinks/main.py new file mode 100644 index 0000000..0ad1ae7 --- /dev/null +++ b/PYTHON/extractLinks/main.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python3 +""" +Extract hosts from href attributes in an HTML file and write them as *host* per line. + +Usage: + python main.py INPUT_HTML [OUTPUT_TXT] + +If OUTPUT_TXT is not provided, the script writes to _links.txt +alongside the input file. +""" + +from __future__ import annotations + +import argparse +import os +from html.parser import HTMLParser +from typing import List, Set +from urllib.parse import urlparse + + +class _HrefParser(HTMLParser): + def __init__(self) -> None: + super().__init__() + self.hrefs: List[str] = [] + + def handle_starttag(self, tag: str, attrs): # type: ignore[override] + # Collect any href attribute on any tag + for (k, v) in attrs: + if k.lower() == "href" and v is not None: + self.hrefs.append(v) + + +def extract_hosts_from_html(html_text: str) -> List[str]: + """Parse HTML text, extract href values, and return a list of hostnames. + + Rules: + - Only http/https URLs are considered. + - Output is the network location (host[:port]) without scheme or path. + - Duplicates are removed, preserving first-seen order. + """ + parser = _HrefParser() + parser.feed(html_text) + + seen: Set[str] = set() + hosts: List[str] = [] + for href in parser.hrefs: + parsed = urlparse(href) + if parsed.scheme in {"http", "https"} and parsed.netloc: + host = parsed.netloc + if host not in seen: + seen.add(host) + hosts.append(host) + return hosts + + +def main() -> int: + ap = argparse.ArgumentParser(description="Extract hosts from hrefs in an HTML file.") + ap.add_argument("input_html", help="Path to input HTML file") + ap.add_argument( + "output_txt", + nargs="?", + help="Path to output text file (defaults to _links.txt in the same directory)", + ) + args = ap.parse_args() + + input_path = args.input_html + if not os.path.isfile(input_path): + raise SystemExit(f"Input file not found: {input_path}") + + out_path = args.output_txt + if not out_path: + base = os.path.splitext(os.path.basename(input_path))[0] + out_path = os.path.join(os.path.dirname(input_path), f"{base}_links.txt") + + with open(input_path, "r", encoding="utf-8", errors="ignore") as f: + html_text = f.read() + + hosts = extract_hosts_from_html(html_text) + + with open(out_path, "w", encoding="utf-8") as f: + for host in hosts: + f.write(f"*{host}*\n") + + print(f"Wrote {len(hosts)} host(s) to {out_path}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) + diff --git a/PYTHON/extractLinks/pytest.ini b/PYTHON/extractLinks/pytest.ini new file mode 100644 index 0000000..91f0d1d --- /dev/null +++ b/PYTHON/extractLinks/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +addopts = -q diff --git a/PYTHON/extractLinks/run.sh b/PYTHON/extractLinks/run.sh new file mode 100755 index 0000000..8d7574a --- /dev/null +++ b/PYTHON/extractLinks/run.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Wrapper to run the extractor. Usage: +# ./run.sh path/to/input.html [output.txt] + +SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) + +if [[ $# -lt 1 || $# -gt 2 ]]; then + echo "Usage: $0 [output.txt]" >&2 + exit 1 +fi + +python3 "$SCRIPT_DIR/main.py" "$@" diff --git a/PYTHON/extractLinks/tests/sample2.html b/PYTHON/extractLinks/tests/sample2.html new file mode 100644 index 0000000..b5931f0 --- /dev/null +++ b/PYTHON/extractLinks/tests/sample2.html @@ -0,0 +1,15 @@ + + + + Sample 2 + + +

Links:

+ + + diff --git a/PYTHON/extractLinks/tests/test_main.py b/PYTHON/extractLinks/tests/test_main.py new file mode 100644 index 0000000..2eee282 --- /dev/null +++ b/PYTHON/extractLinks/tests/test_main.py @@ -0,0 +1,70 @@ +import os +import subprocess +import sys +from pathlib import Path + +import pytest + +# Allow importing from project root when running pytest from this folder +ROOT = Path(__file__).resolve().parents[1] +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) +SCRIPT = ROOT / "main.py" + + +def read_lines(p: Path): + return [l.rstrip("\n") for l in p.read_text(encoding="utf-8").splitlines()] + + +def test_extract_hosts_function(): + from main import extract_hosts_from_html + + html = ( + 'A' + 'B' + 'C' + 'D' + 'E' + ) + hosts = extract_hosts_from_html(html) + assert hosts == ["wiby.me", "example.com"], hosts + + +def test_cli_writes_expected_output(tmp_path: Path): + # copy sample1.html to tmpdir and run the script + sample = ROOT / "tests" / "sample1.html" + html_copy = tmp_path / "sample1.html" + html_copy.write_text(sample.read_text(encoding="utf-8"), encoding="utf-8") + + # Run CLI + out_file = tmp_path / "out.txt" + proc = subprocess.run( + [sys.executable, str(SCRIPT), str(html_copy), str(out_file)], + capture_output=True, + text=True, + check=True, + ) + assert out_file.exists() + + lines = read_lines(out_file) + # Expected order: first time we see wiby.me, then example.com + assert lines == ["*wiby.me*", "*example.com*"], lines + + +def test_cli_default_output_name(tmp_path: Path): + sample = ROOT / "tests" / "sample2.html" + html_copy = tmp_path / "sample2.html" + html_copy.write_text(sample.read_text(encoding="utf-8"), encoding="utf-8") + + proc = subprocess.run( + [sys.executable, str(SCRIPT), str(html_copy)], + capture_output=True, + text=True, + check=True, + ) + + default_out = tmp_path / "sample2_links.txt" + assert default_out.exists() + + lines = read_lines(default_out) + assert lines == ["*sub.domain.co.uk*", "*example.com:8080*"], lines