mirror of
https://github.com/kuhyx/testsAndMisc.git
synced 2026-07-04 14:43:01 +02:00
Improve fetch_license_plates: add constants and update User-Agent
Co-authored-by: kuhyx <147418882+kuhyx@users.noreply.github.com>
This commit is contained in:
parent
d050e004d3
commit
2ce6a74a65
@ -35,6 +35,15 @@ except ImportError:
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
# Constants
|
||||
MIN_TABLE_COLUMNS = 2 # Minimum columns needed to extract code and location
|
||||
MAX_CODE_LENGTH = 4 # Maximum length for a valid license plate code
|
||||
USER_AGENT = (
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/120.0.0.0 Safari/537.36" # Updated to recent version
|
||||
)
|
||||
|
||||
|
||||
def fetch_wikipedia_license_plates() -> dict[str, str]:
|
||||
"""Fetch Polish license plate codes from Wikipedia.
|
||||
@ -46,11 +55,7 @@ def fetch_wikipedia_license_plates() -> dict[str, str]:
|
||||
RuntimeError: If the page cannot be fetched or parsed.
|
||||
"""
|
||||
url = "https://en.wikipedia.org/wiki/Vehicle_registration_plates_of_Poland"
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/91.0.4472.124 Safari/537.36"
|
||||
}
|
||||
headers = {"User-Agent": USER_AGENT}
|
||||
|
||||
sys.stdout.write(f"Fetching data from {url}...\n")
|
||||
|
||||
@ -83,7 +88,7 @@ def fetch_wikipedia_license_plates() -> dict[str, str]:
|
||||
for row in rows[1:]: # Skip header row
|
||||
cells = row.find_all(["td", "th"])
|
||||
|
||||
if len(cells) >= 2: # noqa: PLR2004
|
||||
if len(cells) >= MIN_TABLE_COLUMNS:
|
||||
# Extract code and location
|
||||
code_text = cells[0].get_text(strip=True)
|
||||
location_text = cells[1].get_text(strip=True)
|
||||
@ -92,7 +97,7 @@ def fetch_wikipedia_license_plates() -> dict[str, str]:
|
||||
code = re.sub(r"[^A-Z]", "", code_text.upper())
|
||||
|
||||
# Skip if code is invalid
|
||||
if not code or len(code) > 4: # noqa: PLR2004
|
||||
if not code or len(code) > MAX_CODE_LENGTH:
|
||||
continue
|
||||
|
||||
# Clean up location text (remove citations, extra spaces)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user