init commit
This commit is contained in:
commit
f77edddfaa
3
.idea/.gitignore
generated
vendored
Normal file
3
.idea/.gitignore
generated
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
# Default ignored files
|
||||
/shelf/
|
||||
/workspace.xml
|
6
.idea/inspectionProfiles/profiles_settings.xml
generated
Normal file
6
.idea/inspectionProfiles/profiles_settings.xml
generated
Normal file
@ -0,0 +1,6 @@
|
||||
<component name="InspectionProjectProfileManager">
|
||||
<settings>
|
||||
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||
<version value="1.0" />
|
||||
</settings>
|
||||
</component>
|
8
.idea/mail_loader.iml
generated
Normal file
8
.idea/mail_loader.iml
generated
Normal file
@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="PYTHON_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$" />
|
||||
<orderEntry type="jdk" jdkName="Python 3.13" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
</module>
|
7
.idea/misc.xml
generated
Normal file
7
.idea/misc.xml
generated
Normal file
@ -0,0 +1,7 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="Black">
|
||||
<option name="sdkName" value="Python 3.13" />
|
||||
</component>
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.13" project-jdk-type="Python SDK" />
|
||||
</project>
|
8
.idea/modules.xml
generated
Normal file
8
.idea/modules.xml
generated
Normal file
@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/mail_loader.iml" filepath="$PROJECT_DIR$/.idea/mail_loader.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
1
credentials.json
Normal file
1
credentials.json
Normal file
@ -0,0 +1 @@
|
||||
{"installed":{"client_id":"725019411610-rkg50rru2dklsqdapvekdustsrfe0099.apps.googleusercontent.com","project_id":"phonic-entity-272321","auth_uri":"https://accounts.google.com/o/oauth2/auth","token_uri":"https://oauth2.googleapis.com/token","auth_provider_x509_cert_url":"https://www.googleapis.com/oauth2/v1/certs","client_secret":"aWJYZre_RE55kM64Kf1j_cZB","redirect_uris":["http://localhost"]}}
|
0
data/czsk/.keep
Normal file
0
data/czsk/.keep
Normal file
0
data/rcw/.keep
Normal file
0
data/rcw/.keep
Normal file
0
data/rcw_offers/.keep
Normal file
0
data/rcw_offers/.keep
Normal file
45
docker-compose.yml
Normal file
45
docker-compose.yml
Normal file
@ -0,0 +1,45 @@
|
||||
version: "3.8"
|
||||
services:
|
||||
mail_czsk:
|
||||
build: .
|
||||
container_name: mail_czsk
|
||||
environment:
|
||||
- SUPPORT_ADDRESS=dalkove.ovladace@gmail.com
|
||||
- TOKEN_PATH=/app/tokens/token_czsk.json
|
||||
- OUTPUT_DIR=/data
|
||||
volumes:
|
||||
# Shared OAuth client credentials
|
||||
- ./credentials.json:/app/credentials.json:ro
|
||||
# Per-account token storage
|
||||
- ./tokens:/app/tokens
|
||||
# Where JSON outputs go
|
||||
- ./data/czsk:/data
|
||||
|
||||
mail_rcw:
|
||||
build: .
|
||||
container_name: mail_rcw
|
||||
environment:
|
||||
- SUPPORT_ADDRESS=remote.control.world1@gmail.com
|
||||
- TOKEN_PATH=/app/tokens/token_rcw.json
|
||||
- OUTPUT_DIR=/data
|
||||
volumes:
|
||||
- ./credentials.json:/app/credentials.json:ro
|
||||
- ./tokens:/app/tokens
|
||||
- ./data/rcw:/data
|
||||
|
||||
mail_rcw_offers:
|
||||
build: .
|
||||
container_name: mail_rcw_offers
|
||||
environment:
|
||||
- SUPPORT_ADDRESS=rcw.offline@gmail.com
|
||||
- TOKEN_PATH=/app/tokens/token_rcw_offers.json
|
||||
- OUTPUT_DIR=/data
|
||||
volumes:
|
||||
- ./credentials.json:/app/credentials.json:ro
|
||||
- ./tokens:/app/tokens
|
||||
- ./data/rcw_offers:/data
|
||||
|
||||
# Optional: put all three on the same network
|
||||
networks:
|
||||
default:
|
||||
driver: bridge
|
24
docker/Dockerfile
Normal file
24
docker/Dockerfile
Normal file
@ -0,0 +1,24 @@
|
||||
# Use a slim Python base
|
||||
FROM python:3.11-slim
|
||||
|
||||
# Set working directory
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
libmariadb-dev-compat \
|
||||
libmariadb-dev \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
|
||||
# Install dependencies
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Copy code & credentials template
|
||||
COPY main.py .
|
||||
COPY credentials.json .
|
||||
|
||||
# Entrypoint: reads SUPPORT_ADDRESS, TOKEN_PATH, OUTPUT_DIR env vars
|
||||
ENTRYPOINT ["bash", "-lc", "python main.py"]
|
215
main.py
Normal file
215
main.py
Normal file
@ -0,0 +1,215 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
download_emails.py
|
||||
|
||||
- Uses Gmail API to fetch all messages delivered to a single support address.
|
||||
- Pulls configured Gmail signatures (in any language) via Settings API.
|
||||
- Parses out headers, body, and matches against signatures or falls back to regex or manual name mapping.
|
||||
- Tally per-day sent vs. received counts and per-author sent email counts.
|
||||
- Writes each month's emails into JSON files under OUTPUT_DIR/YYYY/MM/emails.json.
|
||||
- Writes overall stats into OUTPUT_DIR as JSON files.
|
||||
- Outputs progress logs to stdout for Docker visibility.
|
||||
- (Commented-out) Stubs for loading the daily/author stats into MariaDB.
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import base64
|
||||
import re
|
||||
from datetime import datetime
|
||||
from collections import defaultdict
|
||||
import logging
|
||||
|
||||
from google.oauth2.credentials import Credentials
|
||||
from google_auth_oauthlib.flow import InstalledAppFlow
|
||||
from googleapiclient.discovery import build
|
||||
|
||||
# Configure logging for progress
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s')
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Gmail API scope
|
||||
SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']
|
||||
|
||||
# Configuration from environment
|
||||
SUPPORT_ADDRESS = os.environ.get('SUPPORT_ADDRESS', 'remote.control.world1@gmail.com')
|
||||
TOKEN_PATH = os.environ.get('TOKEN_PATH', 'tokens/test.json')
|
||||
OUTPUT_DIR = os.environ.get('OUTPUT_DIR', 'data')
|
||||
|
||||
# Aliases considered as sent
|
||||
SENDER_ALIASES = [SUPPORT_ADDRESS.lower(), 'orders@remote-control-world.eu']
|
||||
|
||||
# Manual name mapping if signatures not found
|
||||
NAME_PATTERNS = {
|
||||
'Josefina': ['Josefína Bartková'],
|
||||
'Ondra': ['Kateřina Kulhánková', 'Kateřina Kulhánkova', 'Ondrej', 'Ondřej'],
|
||||
'Honza': ['Jan Klus'],
|
||||
'Halina': ['Halina Kutláková'],
|
||||
'Helena': ['Helena Urbášková'],
|
||||
'AdamH': ['Adam Holuša'],
|
||||
'AdamK': ['Adam Kulhánek'],
|
||||
'KubaS': ['Jakub Sopuch'],
|
||||
'LukasZ': ['Lukáš Zdražila'],
|
||||
'Breta': ['Břetislav Střaslička'],
|
||||
'TerkaP': ['Tereza Pěchulová'],
|
||||
'Marketa': ['Marketa', 'Markéta'],
|
||||
}
|
||||
|
||||
|
||||
def get_gmail_service(token_path: str):
|
||||
"""
|
||||
Load OAuth client credentials and per-account token.
|
||||
"""
|
||||
creds = None
|
||||
if os.path.exists(token_path):
|
||||
creds = Credentials.from_authorized_user_file(token_path, SCOPES)
|
||||
if not creds or not creds.valid:
|
||||
flow = InstalledAppFlow.from_client_secrets_file('credentials.json', SCOPES)
|
||||
creds = flow.run_local_server(port=0)
|
||||
with open(token_path, 'w') as f:
|
||||
f.write(creds.to_json())
|
||||
return build('gmail', 'v1', credentials=creds)
|
||||
|
||||
|
||||
def extract_body(payload) -> str:
|
||||
"""
|
||||
Recursively find text/plain payload.
|
||||
"""
|
||||
if payload.get('mimeType') == 'text/plain' and payload.get('body', {}).get('data'):
|
||||
data = payload['body']['data']
|
||||
return base64.urlsafe_b64decode(data).decode('utf-8', errors='ignore')
|
||||
for part in payload.get('parts', []):
|
||||
text = extract_body(part)
|
||||
if text:
|
||||
return text
|
||||
return ''
|
||||
|
||||
|
||||
def load_signatures(service):
|
||||
"""
|
||||
Fetch configured Gmail signatures.
|
||||
"""
|
||||
sigs = []
|
||||
resp = service.users().settings().sendAs().list(userId='me').execute()
|
||||
for entry in resp.get('sendAs', []):
|
||||
html = entry.get('signature') or ''
|
||||
text = re.sub(r'<[^>]+>', '', html).strip()
|
||||
if text:
|
||||
sigs.append({
|
||||
'email': entry['sendAsEmail'],
|
||||
'name': entry.get('displayName') or entry['sendAsEmail'],
|
||||
'signature': text
|
||||
})
|
||||
logger.info(f"Loaded {len(sigs)} configured signatures from Gmail settings")
|
||||
return sigs
|
||||
|
||||
|
||||
def extract_author(body: str, signatures: list) -> str:
|
||||
"""
|
||||
Identify author by:
|
||||
1) Matching configured signature blocks
|
||||
2) Manual name mapping
|
||||
3) Regex fallback for common sign-offs
|
||||
"""
|
||||
# 1) Signature blocks
|
||||
for s in signatures:
|
||||
sig = s.get('signature')
|
||||
if sig and sig in body:
|
||||
return s['name']
|
||||
|
||||
# 2) Manual name patterns
|
||||
for name, patterns in NAME_PATTERNS.items():
|
||||
for pat in patterns:
|
||||
if pat in body:
|
||||
return name
|
||||
|
||||
# 3) Regex fallback
|
||||
match = re.search(
|
||||
r'(?im)(?:Podpis|S pozdravem|Díky|Thanks|Regards|Best regards|Sincerely)[\s,]*\r?\n([^\r\n]{2,})',
|
||||
body
|
||||
)
|
||||
if match:
|
||||
return match.group(1).strip()
|
||||
return None
|
||||
|
||||
|
||||
def main():
|
||||
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
||||
service = get_gmail_service(TOKEN_PATH)
|
||||
signatures = load_signatures(service)
|
||||
|
||||
monthly_emails = defaultdict(list)
|
||||
daily_counts = defaultdict(lambda: {'sent': 0, 'received': 0})
|
||||
author_counts = defaultdict(int)
|
||||
|
||||
logger.info(f"Starting fetch for mailbox: {SUPPORT_ADDRESS}")
|
||||
next_page_token = None
|
||||
page_count = 0
|
||||
total_msgs = 0
|
||||
|
||||
while True:
|
||||
page_count += 1
|
||||
resp = service.users().messages().list(
|
||||
userId='me', q='after:2025-03-01', pageToken=next_page_token, maxResults=500
|
||||
).execute()
|
||||
messages = resp.get('messages', [])
|
||||
count = len(messages)
|
||||
total_msgs += count
|
||||
logger.info(f"Page {page_count}: fetched {count} messages (total so far: {total_msgs})")
|
||||
|
||||
if not messages:
|
||||
break
|
||||
|
||||
for meta in messages:
|
||||
msg = service.users().messages().get(
|
||||
userId='me', id=meta['id'], format='full'
|
||||
).execute()
|
||||
headers = {h['name']: h['value'] for h in msg['payload'].get('headers', [])}
|
||||
body = extract_body(msg['payload'])
|
||||
author = extract_author(body, signatures)
|
||||
|
||||
dt = datetime.fromtimestamp(int(msg['internalDate']) / 1000)
|
||||
year, month, day = dt.strftime('%Y'), dt.strftime('%m'), dt.strftime('%Y-%m-%d')
|
||||
from_hdr = headers.get('From', '').lower()
|
||||
is_sent = any(alias in from_hdr for alias in SENDER_ALIASES)
|
||||
|
||||
if is_sent:
|
||||
daily_counts[day]['sent'] += 1
|
||||
if author:
|
||||
author_counts[author] += 1
|
||||
else:
|
||||
daily_counts[day]['received'] += 1
|
||||
|
||||
monthly_emails[(year, month)].append({
|
||||
'id': msg['id'], 'threadId': msg.get('threadId'), 'source': SUPPORT_ADDRESS,
|
||||
'from': headers.get('From'), 'to': headers.get('To'), 'date': headers.get('Date'),
|
||||
'subject': headers.get('Subject'), 'internalDate': msg.get('internalDate'),
|
||||
'body': body, 'author': author, 'sent': is_sent
|
||||
})
|
||||
|
||||
next_page_token = resp.get('nextPageToken')
|
||||
if not next_page_token:
|
||||
break
|
||||
|
||||
logger.info(f"Fetch complete: total messages retrieved: {total_msgs}")
|
||||
|
||||
# Write monthly files
|
||||
for (yr, mo), emails in monthly_emails.items():
|
||||
path = os.path.join(OUTPUT_DIR, yr, mo)
|
||||
os.makedirs(path, exist_ok=True)
|
||||
with open(os.path.join(path, 'emails.json'), 'w', encoding='utf-8') as f:
|
||||
json.dump(emails, f, ensure_ascii=False, indent=2)
|
||||
logger.info(f"Wrote {len(emails)} emails to {yr}/{mo}/emails.json")
|
||||
|
||||
# Write stats
|
||||
with open(os.path.join(OUTPUT_DIR, 'daily_counts.json'), 'w') as f:
|
||||
json.dump(daily_counts, f, indent=2)
|
||||
with open(os.path.join(OUTPUT_DIR, 'author_counts.json'), 'w') as f:
|
||||
json.dump(author_counts, f, indent=2)
|
||||
|
||||
logger.info(f"Processed {total_msgs} messages into {len(monthly_emails)} month folders under {OUTPUT_DIR}")
|
||||
|
||||
# MariaDB stubs commented out
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
5
requirements.txt
Normal file
5
requirements.txt
Normal file
@ -0,0 +1,5 @@
|
||||
google-api-python-client>=2.70.0
|
||||
google-auth>=2.20.0
|
||||
google-auth-oauthlib>=0.8.0
|
||||
google-auth-httplib2>=0.1.0
|
||||
mariadb
|
1
tokens/token_czsk.json
Normal file
1
tokens/token_czsk.json
Normal file
@ -0,0 +1 @@
|
||||
{"token": "ya29.A0AS3H6NxX5_wFQ28hw5Wvy34_Kr2dJ_qpGSpMB86BQSUHMMzPeFaZFP8DhwcDwJUuej8jxt1Z6sPFmpnFNyJTqXy40h8PFfrAj0N6GKLXbkswHCCDuH2FNT7zgTU-q1wVZfheDqlomIXt6ukLNVwFWdn9xhlrFhbqMdgPd7aZFH9GPR8IQ61knft3AWHECfcYytcpODgaCgYKAfkSARESFQHGX2MiOVdSBUIU6OxpKQ-ewhvXHQ0206", "refresh_token": "1//09WgPH6MscuSGCgYIARAAGAkSNwF-L9IrtR_J0YXxSAE-7quKW77B2QzGTzvIpdnrVy2I2w-_Ug42SDRFbzb3FvhN4RpLgVrFDyw", "token_uri": "https://oauth2.googleapis.com/token", "client_id": "725019411610-rkg50rru2dklsqdapvekdustsrfe0099.apps.googleusercontent.com", "client_secret": "aWJYZre_RE55kM64Kf1j_cZB", "scopes": ["https://www.googleapis.com/auth/gmail.readonly"], "universe_domain": "googleapis.com", "account": "", "expiry": "2025-07-18T13:40:45.787490Z"}
|
1
tokens/token_rcw.json
Normal file
1
tokens/token_rcw.json
Normal file
@ -0,0 +1 @@
|
||||
{"token": "ya29.a0AS3H6NzeWcZMQcPsi1jy_NjB7zmmAJBlqvi0ua6m5RDob-TDnKLMqxWtIch1IlKZUPBqopiqsBTk60D88mbWci1l1gpfg6a4BZPYfjRilRizauvGwhmdBprCBTD8roR51AtsOjbJDyu6mV-6tyO7Sw4p7T1TZOFnD7pusaIzaCgYKAd8SARMSFQHGX2MiotN4orz1RRESdeSjksyeoA0175", "refresh_token": "1//09dd8-QNFUgKBCgYIARAAGAkSNwF-L9Iro08y7q9PmEZXCWIjykiIINgsK0wE-tcZvmQvaPOV3XZ5EjJuSdfxHuVte-yA2UEP4T4", "token_uri": "https://oauth2.googleapis.com/token", "client_id": "725019411610-rkg50rru2dklsqdapvekdustsrfe0099.apps.googleusercontent.com", "client_secret": "aWJYZre_RE55kM64Kf1j_cZB", "scopes": ["https://www.googleapis.com/auth/gmail.readonly"], "universe_domain": "googleapis.com", "account": "", "expiry": "2025-07-18T13:09:32.987158Z"}
|
1
tokens/token_rcw_offers.json
Normal file
1
tokens/token_rcw_offers.json
Normal file
@ -0,0 +1 @@
|
||||
{"token": "ya29.a0AS3H6NxUCppUuFN_LhoZnpuuD6IZTl-u8DgCKop7SwRd6kh7ENotgfOMzrB5nDtXB12vVA0Ko19MErZWCjAo3oL5EFcJt6-MgZyfHtx0nAfolICT4xTqzHbGnCwVRHoHaCRHNw_z7ZluK-a8QWCv1RVjEC7mJ9eaU-cDTqnJaCgYKASYSARYSFQHGX2MiKy4I0lPP4dsMNSdDdHAqww0175", "refresh_token": "1//09tZ1D9cViQReCgYIARAAGAkSNwF-L9IrpGT7p2Wkd3dlLINt5SwkkUEbjvlx3yDqXlohoi0z_evodLpu8M3Jv7EXbYAF1toQDNc", "token_uri": "https://oauth2.googleapis.com/token", "client_id": "725019411610-rkg50rru2dklsqdapvekdustsrfe0099.apps.googleusercontent.com", "client_secret": "aWJYZre_RE55kM64Kf1j_cZB", "scopes": ["https://www.googleapis.com/auth/gmail.readonly"], "universe_domain": "googleapis.com", "account": "", "expiry": "2025-07-18T13:40:22.079227Z"}
|
Loading…
Reference in New Issue
Block a user