from pathlib import Path
from landingai_ade import LandingAIADE
from PIL import Image, ImageDraw
import pymupdf
# Define colors for each chunk type
CHUNK_TYPE_COLORS = {
"chunkText": (40, 167, 69), # Green
"chunkTable": (0, 123, 255), # Blue
"chunkMarginalia": (111, 66, 193), # Purple
"chunkFigure": (255, 0, 255), # Magenta
"chunkLogo": (144, 238, 144), # Light green
"chunkCard": (255, 165, 0), # Orange
"chunkAttestation": (0, 255, 255), # Cyan
"chunkScanCode": (255, 193, 7), # Yellow
"chunkForm": (220, 20, 60), # Red
"tableCell": (173, 216, 230), # Light blue
"table": (70, 130, 180), # Steel blue
}
def draw_bounding_boxes(parse_response, document_path):
"""Draw bounding boxes around each chunk."""
def create_annotated_image(image, groundings, page_num=0):
"""Create an annotated image with grounding boxes and labels."""
annotated_img = image.copy()
draw = ImageDraw.Draw(annotated_img)
img_width, img_height = image.size
for gid, grounding in groundings.items():
# Check if grounding belongs to this page (for PDFs)
if grounding.page != page_num:
continue
box = grounding.box
# Extract coordinates from box
left, top, right, bottom = box.left, box.top, box.right, box.bottom
# Convert to pixel coordinates
x1 = int(left * img_width)
y1 = int(top * img_height)
x2 = int(right * img_width)
y2 = int(bottom * img_height)
# Draw bounding box
color = CHUNK_TYPE_COLORS.get(grounding.type, (128, 128, 128)) # Default to gray
draw.rectangle([x1, y1, x2, y2], outline=color, width=3)
# Draw label background and text
label = f"{grounding.type}:{gid}"
label_y = max(0, y1 - 20)
draw.rectangle([x1, label_y, x1 + len(label) * 8, y1], fill=color)
draw.text((x1 + 2, label_y + 2), label, fill=(255, 255, 255))
return annotated_img
if document_path.suffix.lower() == '.pdf':
pdf = pymupdf.open(document_path)
total_pages = len(pdf)
base_name = document_path.stem
for page_num in range(total_pages):
page = pdf[page_num]
pix = page.get_pixmap(matrix=pymupdf.Matrix(2, 2)) # 2x scaling
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
# Create and save annotated image
annotated_img = create_annotated_image(img, parse_response.grounding, page_num)
annotated_path = f"page_{page_num + 1}_annotated.png"
annotated_img.save(annotated_path)
print(f"Annotated image saved to: {annotated_path}")
pdf.close()
else:
# Load image file directly
img = Image.open(document_path)
if img.mode != "RGB":
img = img.convert("RGB")
# Create and save annotated image
annotated_img = create_annotated_image(img, parse_response.grounding)
annotated_path = "page_annotated.png"
annotated_img.save(annotated_path)
print(f"Annotated image saved to: {annotated_path}")
return None
# Initialize client (uses the API key from the VISION_AGENT_API_KEY environment variable)
client = LandingAIADE()
# Replace with your file path
document_path = Path("/path/to/file/document")
# Parse the document
print("Parsing document...")
parse_response = client.parse(
document=document_path,
model="dpt-2-latest"
)
print("Parsing complete!")
# Draw bounding boxes and create annotated images
draw_bounding_boxes(parse_response, document_path)