You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
for attempt in range(retries):
try:
response = requests.get(link)
response.raise_for_status()
file_bytes = response.content
file_ext = link.split('.')[-1].lower()
if file_ext in ('jpg', 'jpeg', 'png', 'jfif'):
pil_image = PIL.Image.open(BytesIO(file_bytes))
pil_image = pil_image.convert("L")
image_bytes = BytesIO()
pil_image.save(image_bytes, format="PNG")
image_part = image_format(image_bytes.getvalue())
extracted_data = await process_content(image_part, db, filename=filename, url_link=url_link)
elif file_ext == 'pdf':
pdf_part = {
"mime_type": "application/pdf",
"data": file_bytes
}
extracted_data = await process_content(pdf_part, db, filename=filename, url_link=url_link)
else:
print(f"Unsupported file type in link: {link}")
return {}
# --- Content Duplicate Check against last 5 Days of Records ---
invoice_no = extracted_data.get("Invoice No")
supplier_gstin_number = extracted_data.get("Supplier GSTIN")
invoice_amount_str = str(extracted_data.get("Total Amount", ""))
invoice_date = extracted_data.get("Invoice Date")
print(f"Extracted Data from process_invoice_link - process_content: Invoice No: {invoice_no}, GSTIN: {supplier_gstin_number}, Amount: {invoice_amount_str}, Date: {invoice_date}") # Enhanced Logging - Extracted from process_content
if invoice_no and supplier_gstin_number and invoice_amount_str and invoice_date:
five_days_ago = date.today() - timedelta(days=4) # Calculate date 5 days ago
# Fetch invoices from the last 5 days
recent_invoices = (
db.query(models.InvoiceData)
.filter(models.InvoiceData.created_at >= five_days_ago) # Filter by date
.order_by(models.InvoiceData.id.desc())
.all()
)
print(f"Checking invoices from last 5 days. Date threshold: {five_days_ago}")
print(f"Number of recent invoices to check: {len(recent_invoices)}")
if not recent_invoices: # Explicitly check if recent_invoices is empty
print("No previous invoices found in last 5 days. Skipping duplicate content check against recent invoices for now.")
else:
for existing_invoice in recent_invoices:
if existing_invoice.url_link == link: # Check for link duplicate first - now checked AFTER content processing
print(f"Link duplicate found in last 5 days for link: {link}")
return {
"duplicate_content": True,
"Invoice No": invoice_no,
"Supplier GSTIN": supplier_gstin_number,
"Total Amount": invoice_amount_str,
"Invoice Date": invoice_date,
"duplicate_link": True
}
for existing_invoice in recent_invoices: # Content duplicate check if no link duplicate
existing_invoice_amount_str = str(existing_invoice.invoice_amount) if existing_invoice.invoice_amount is not None else "0" # Handle potential None values from DB
print(f"process_invoice_link - Comparing against DB record: Invoice No: {existing_invoice.invoice_no}, GSTIN: {existing_invoice.supplier_gstin_number}, Amount: {existing_invoice_amount_str}, Date: {existing_invoice.invoice_date}") # Enhanced Logging - DB Record in Loop
# Content Duplicate Check: Invoice No, Supplier GSTIN, Invoice Date, and Invoice Amount MUST match
if (existing_invoice.invoice_no == invoice_no and
existing_invoice.supplier_gstin_number == supplier_gstin_number and
existing_invoice.invoice_date == invoice_date and
existing_invoice_amount_str == invoice_amount_str): # String-to-String comparison
print(f"Content duplicate found for invoice: {invoice_no}, GSTIN: {supplier_gstin_number}, Date: {invoice_date}, Amount: {invoice_amount_str}") # Enhanced Logging - Content Duplicate Found
return {
"duplicate_content": True,
"Invoice No": invoice_no,
"Supplier GSTIN": supplier_gstin_number,
"Total Amount": invoice_amount_str,
"Invoice Date": invoice_date,
"duplicate_link": False
}
return extracted_data
except requests.exceptions.RequestException as e:
print(f"Request error for link {link} (Attempt {attempt+1}/{retries}): {e}")
if attempt < retries - 1:
time.sleep(retry_delay)
else:
print(f"Request failed after multiple retries for link {link}. Skipping.")
return {}
except Exception as e:
print(f"Error processing link {link}: {e}")
return {}
return {}
The text was updated successfully, but these errors were encountered:
@app.post("/extract-invoice")
async def extract_invoice(file: UploadFile = File(...), username: str = Form(...), db: Session = Depends(get_db)):
filename = file.filename
async def process_invoice_link(link: str, db: Session, filename: str, url_link: str):
retries = 10
retry_delay = 5
The text was updated successfully, but these errors were encountered: