edits
This commit is contained in:
parent
251721f23f
commit
684b0c841c
@ -1,5 +1,7 @@
|
|||||||
services:
|
services:
|
||||||
mail_czsk:
|
mail_czsk:
|
||||||
|
stdin_open: true
|
||||||
|
tty: true
|
||||||
image: t0is/vat-mail-loader:latest
|
image: t0is/vat-mail-loader:latest
|
||||||
container_name: mail_czsk
|
container_name: mail_czsk
|
||||||
environment:
|
environment:
|
||||||
@ -15,6 +17,8 @@ services:
|
|||||||
- ./data/czsk:/data
|
- ./data/czsk:/data
|
||||||
|
|
||||||
mail_rcw:
|
mail_rcw:
|
||||||
|
stdin_open: true
|
||||||
|
tty: true
|
||||||
image: t0is/vat-mail-loader:latest
|
image: t0is/vat-mail-loader:latest
|
||||||
container_name: mail_rcw
|
container_name: mail_rcw
|
||||||
environment:
|
environment:
|
||||||
@ -27,6 +31,8 @@ services:
|
|||||||
- ./data/rcw:/data
|
- ./data/rcw:/data
|
||||||
|
|
||||||
mail_rcw_offers:
|
mail_rcw_offers:
|
||||||
|
stdin_open: true
|
||||||
|
tty: true
|
||||||
image: t0is/vat-mail-loader:latest
|
image: t0is/vat-mail-loader:latest
|
||||||
container_name: mail_rcw_offers
|
container_name: mail_rcw_offers
|
||||||
environment:
|
environment:
|
||||||
|
30
main.py
30
main.py
@ -1,6 +1,6 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
"""
|
"""
|
||||||
download_emails.py
|
download_emails
|
||||||
|
|
||||||
- Uses Gmail API to fetch all messages delivered to a single support address.
|
- Uses Gmail API to fetch all messages delivered to a single support address.
|
||||||
- Pulls configured Gmail signatures (in any language) via Settings API.
|
- Pulls configured Gmail signatures (in any language) via Settings API.
|
||||||
@ -19,8 +19,10 @@ import re
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
import logging
|
import logging
|
||||||
|
import sys
|
||||||
|
|
||||||
from google.oauth2.credentials import Credentials
|
from google.oauth2.credentials import Credentials
|
||||||
|
from google.auth.transport.requests import Request
|
||||||
from google_auth_oauthlib.flow import InstalledAppFlow
|
from google_auth_oauthlib.flow import InstalledAppFlow
|
||||||
from googleapiclient.discovery import build
|
from googleapiclient.discovery import build
|
||||||
|
|
||||||
@ -58,16 +60,32 @@ NAME_PATTERNS = {
|
|||||||
|
|
||||||
def get_gmail_service(token_path: str):
|
def get_gmail_service(token_path: str):
|
||||||
"""
|
"""
|
||||||
Load OAuth client credentials and per-account token.
|
Load OAuth credentials and handle refreshing.
|
||||||
|
Performs interactive auth only when no valid token/refresh available.
|
||||||
"""
|
"""
|
||||||
creds = None
|
creds = None
|
||||||
|
# Load existing tokens
|
||||||
if os.path.exists(token_path):
|
if os.path.exists(token_path):
|
||||||
creds = Credentials.from_authorized_user_file(token_path, SCOPES)
|
creds = Credentials.from_authorized_user_file(token_path, SCOPES)
|
||||||
|
# Refresh if expired
|
||||||
|
if creds and creds.expired and creds.refresh_token:
|
||||||
|
logger.info("Refreshing access token using refresh token...")
|
||||||
|
creds.refresh(Request())
|
||||||
|
# If no valid credentials, do full auth flow
|
||||||
if not creds or not creds.valid:
|
if not creds or not creds.valid:
|
||||||
flow = InstalledAppFlow.from_client_secrets_file('credentials.json', SCOPES)
|
flow = InstalledAppFlow.from_client_secrets_file('credentials.json', SCOPES)
|
||||||
creds = flow.run_local_server(port=0)
|
auth_url, _ = flow.authorization_url(access_type='offline', prompt='consent')
|
||||||
with open(token_path, 'w') as f:
|
logger.warning("Please open this URL in your browser:\n%s", auth_url)
|
||||||
f.write(creds.to_json())
|
sys.stdout.write("Enter the authorization code here: ")
|
||||||
|
sys.stdout.flush()
|
||||||
|
code = sys.stdin.readline().strip()
|
||||||
|
flow.fetch_token(code=code)
|
||||||
|
creds = flow.credentials
|
||||||
|
# Save for next time
|
||||||
|
with open(token_path, 'w') as token_file:
|
||||||
|
token_file.write(creds.to_json())
|
||||||
|
logger.info("Saved new token to %s", token_path)
|
||||||
|
# Build service
|
||||||
return build('gmail', 'v1', credentials=creds)
|
return build('gmail', 'v1', credentials=creds)
|
||||||
|
|
||||||
|
|
||||||
@ -116,13 +134,11 @@ def extract_author(body: str, signatures: list) -> str:
|
|||||||
sig = s.get('signature')
|
sig = s.get('signature')
|
||||||
if sig and sig in body:
|
if sig and sig in body:
|
||||||
return s['name']
|
return s['name']
|
||||||
|
|
||||||
# 2) Manual name patterns
|
# 2) Manual name patterns
|
||||||
for name, patterns in NAME_PATTERNS.items():
|
for name, patterns in NAME_PATTERNS.items():
|
||||||
for pat in patterns:
|
for pat in patterns:
|
||||||
if pat in body:
|
if pat in body:
|
||||||
return name
|
return name
|
||||||
|
|
||||||
# 3) Regex fallback
|
# 3) Regex fallback
|
||||||
match = re.search(
|
match = re.search(
|
||||||
r'(?im)(?:Podpis|S pozdravem|Díky|Thanks|Regards|Best regards|Sincerely)[\s,]*\r?\n([^\r\n]{2,})',
|
r'(?im)(?:Podpis|S pozdravem|Díky|Thanks|Regards|Best regards|Sincerely)[\s,]*\r?\n([^\r\n]{2,})',
|
||||||
|
Loading…
Reference in New Issue
Block a user