Spaces:
Sleeping
Sleeping
File size: 3,922 Bytes
e69d432 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
import imaplib
import email
import os
import time
import requests
MAIL_SERVER = 'imap.gmail.com'
EMAIL_USER = '[email protected]'
EMAIL_PASS = 'aymw wxrh fmea bxfz'
MAIL_BOX = 'INBOX'
DOWNLOAD_FOLDER = "./downloaded_pdfs"
BACKEND_UPLOAD_URL = "http://127.0.0.1:8000/documents/upload"
os.makedirs(DOWNLOAD_FOLDER, exist_ok=True)
def upload_file_to_backend(file_path):
"""
Uploads a PDF to the backend. The server will handle setting the defaults.
"""
print(f"--- Uploading {os.path.basename(file_path)} to the server... ---")
try:
with open(file_path, "rb") as f:
# We ONLY need to send the file now. No more payload.
files = {"file": (os.path.basename(file_path), f, "application/pdf")}
response = requests.post(BACKEND_UPLOAD_URL, files=files) # The 'data' parameter is removed
response.raise_for_status()
print(f"β
Successfully uploaded and processed {os.path.basename(file_path)}.")
return True
except requests.exceptions.RequestException as e:
print(f"β ERROR: Failed to upload {os.path.basename(file_path)}.")
print(f"Reason: {e}")
if e.response:
print(f"--- Server's Detailed Error Response ---")
try: print(e.response.json())
except ValueError: print(e.response.text)
print(f"------------------------------------")
return False
def download_and_process_attachments(mail):
"""
Searches for UNSEEN emails, downloads PDF attachments, and uploads them.
"""
mail.select(MAIL_BOX)
# IMPORTANT: Changed to 'UNSEEN' to only process new emails
status, mails = mail.search(None, 'UNSEEN')
if status != 'OK':
print("Error searching for emails.")
return
email_ids = mails[0].split()
if not email_ids:
print("No new emails found.")
return
print(f"Found {len(email_ids)} new emails to process.")
for eid in email_ids:
_, msg_data = mail.fetch(eid, "(RFC822)")
msg = email.message_from_bytes(msg_data[0][1])
for part in msg.walk():
# Skip container parts and parts without a filename
if part.get_content_maintype() == 'multipart' or part.get('Content-Disposition') is None:
continue
filename = part.get_filename()
# Check if there is a filename and it ends with .pdf
if filename and filename.lower().endswith(".pdf"):
# 1. DOWNLOAD the file to a temporary location
filepath = os.path.join(DOWNLOAD_FOLDER, filename)
with open(filepath, 'wb') as f:
f.write(part.get_payload(decode=True))
print(f"Downloaded temporary file: {filepath}")
# 2. UPLOAD the file to the backend
upload_success = upload_file_to_backend(filepath)
# 3. CLEAN UP the temporary file after processing
try:
os.remove(filepath)
print(f"Cleaned up temporary file: {filepath}")
except OSError as e:
print(f"Error during file cleanup: {e}")
if __name__ == "__main__":
while True:
try:
mail = imaplib.IMAP4_SSL(MAIL_SERVER)
mail.login(EMAIL_USER, EMAIL_PASS)
print("β
Logged in successfully. Starting to monitor for new emails...")
while True:
download_and_process_attachments(mail)
print("--- Waiting for 60 seconds before next check... ---")
time.sleep(60)
except Exception as e:
print(f"An unexpected error occurred: {e}")
print("--- Attempting to reconnect in 60 seconds... ---")
time.sleep(60) |