import requests
import json
VA_API_KEY = <YOUR_VA_API_KEY> # Replace with your API key
headers = {"Authorization": f"Bearer {VA_API_KEY}"}
url = "https://api.va.landing.ai/v1/tools/agentic-document-analysis"
base_pdf_path = "your_pdf_path" # Replace with the path to the file
pdf_name = "filename.pdf" # Replace the file
pdf_path = f"{base_pdf_path}/{pdf_name}"
# Define document types
class_schema = {
"type": "object",
"properties": {
"document_type": {"type": "string", "enum": ["Passport", "Invoice", "Other"]}
},
"required": ["document_type"],
}
# First request: classification
with open(pdf_path, "rb") as f:
files = [("pdf", (pdf_name, f, "application/pdf"))]
payload = {"fields_schema": json.dumps(class_schema)}
classification_response = requests.post(
url, headers=headers, files=files, data=payload
)
classification = classification_response.json()["data"]["extracted_schema"][
"document_type"
]
# Define schema based on classification
if classification == "Passport":
schema = {
"type": "object",
"properties": {
"Given Names": {"type": "string"},
"Date of birth": {"type": "string"},
"ID_number": {"type": "number"},
"Passport Number": {"type": "string"},
},
"required": ["Given Names", "Date of birth", "ID_number", "Passport Number"],
}
elif classification == "Invoice":
schema = {
"type": "object",
"properties": {
"Bill to": {"type": "string"},
"Invoice Number": {"type": "string"},
"Invoice Date": {"type": "string"},
"Due Date": {"type": "string"},
"Total": {"type": "string"},
},
"required": ["Bill to", "Invoice Number", "Invoice Date", "Due Date", "Total"],
}
else:
print("Document type is 'Other'. No extraction schema defined.")
exit()
# Second request: extraction
with open(pdf_path, "rb") as f:
files = [("pdf", (pdf_name, f, "application/pdf"))]
payload = {"fields_schema": json.dumps(schema)}
response = requests.post(url, headers=headers, files=files, data=payload)
output_data = response.json()["data"]
extracted_info = output_data["extracted_schema"]
print(extracted_info)