import re from datetime import datetime def extract_numbers(text: str) -> str: """Extracts only digits from the given string.""" return ''.join(re.findall(r'\d+', text)) def extract_order_number(reference: str) -> str: """Extracts a 6-digit order number from the reference field. If no 6-digit number is found, returns "0". """ match = re.search(r'\b\d{6}\b', reference) return match.group(0) if match else "0" def parse_date(date_str): """Tries multiple date formats to handle different CSV structures.""" date_formats = [ "%Y-%m-%d %H:%M:%S", # 2025-02-06 10:35:44 "%d.%m.%Y %H:%M", # 31.12.2024 21:17 "%Y-%m-%d", # 2025-02-06 "%d-%m-%Y", # 06-02-2025 "%d.%m.%Y", # 06.02.2025 "%d.%m.%y" # 06.02.25 ] for fmt in date_formats: try: return datetime.strptime(date_str, fmt) except ValueError: continue # Try the next format # raise ValueError(f"Unsupported date format: {date_str}") # Raise error if none match return datetime.today()