2025-11-30 14:45:55 +01:00
|
|
|
"""Download cat images from TheCatAPI.
|
|
|
|
|
|
|
|
|
|
Fetches cat images in batches and saves them to a local directory.
|
|
|
|
|
"""
|
|
|
|
|
|
2025-11-30 13:42:16 +01:00
|
|
|
import json
|
2025-11-30 14:36:13 +01:00
|
|
|
import logging
|
2024-09-26 19:22:43 +02:00
|
|
|
from pathlib import Path
|
|
|
|
|
|
2025-11-30 13:42:16 +01:00
|
|
|
import requests
|
|
|
|
|
|
2025-11-30 21:59:24 +01:00
|
|
|
_logger = logging.getLogger(__name__)
|
2025-11-30 14:36:13 +01:00
|
|
|
|
2025-11-30 15:01:14 +01:00
|
|
|
MAX_REQUESTS = 90
|
2025-11-30 15:17:52 +01:00
|
|
|
REQUEST_TIMEOUT = 30 # seconds
|
2025-11-30 15:01:14 +01:00
|
|
|
|
2025-11-30 21:29:03 +01:00
|
|
|
|
|
|
|
|
def _download_single_image(url: str) -> None:
|
|
|
|
|
"""Download and save a single image from URL.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
url: The URL of the image to download.
|
|
|
|
|
"""
|
|
|
|
|
try:
|
|
|
|
|
# Get the image content
|
|
|
|
|
response = requests.get(url, timeout=REQUEST_TIMEOUT)
|
|
|
|
|
response.raise_for_status() # Raise an exception for HTTP errors
|
|
|
|
|
|
|
|
|
|
# Extract the image name from the URL
|
2025-11-30 23:03:03 +01:00
|
|
|
image_name = Path(url).name
|
|
|
|
|
image_path = Path("./CATS2/") / image_name
|
2025-11-30 21:29:03 +01:00
|
|
|
|
|
|
|
|
# Save the image to the directory
|
|
|
|
|
with open(image_path, "wb") as file:
|
|
|
|
|
file.write(response.content)
|
|
|
|
|
|
2025-11-30 21:59:24 +01:00
|
|
|
_logger.info("Saved %s as %s", url, image_path)
|
2025-11-30 21:29:03 +01:00
|
|
|
|
|
|
|
|
except requests.exceptions.RequestException:
|
2025-11-30 21:59:24 +01:00
|
|
|
_logger.exception("Failed to download %s", url)
|
2025-11-30 21:29:03 +01:00
|
|
|
|
|
|
|
|
|
2024-09-26 19:22:43 +02:00
|
|
|
requests_send = 0
|
2025-11-30 15:01:14 +01:00
|
|
|
while requests_send < MAX_REQUESTS:
|
2025-11-30 15:17:52 +01:00
|
|
|
res = requests.get(
|
|
|
|
|
"https://api.thecatapi.com/v1/images/search?limit=100&api_key=",
|
|
|
|
|
timeout=REQUEST_TIMEOUT,
|
|
|
|
|
)
|
2024-09-26 19:22:43 +02:00
|
|
|
requests_send += 1
|
|
|
|
|
response = json.loads(res.text)
|
2025-11-30 20:47:38 +01:00
|
|
|
urls = [cat.get("url") for cat in response]
|
2024-09-26 19:22:43 +02:00
|
|
|
|
|
|
|
|
Path("./CATS2").mkdir(parents=True, exist_ok=True)
|
|
|
|
|
for url in urls:
|
2025-11-30 21:29:03 +01:00
|
|
|
_download_single_image(url)
|