Skip to main content
This article is about the legacy ADE endpoint (v1/tools/agentic-document-analysis). Use the current endpoints for all new projects.
When you parse a document with the legacy API, the parsed data is returned in a structured JSON format. For full details about the API response, go to the API Reference.
{
  "$defs": {
    "DocumentAnalysisChunk": {
      "description": "An extracted chunk from the document",
      "properties": {
        "text": {
          "description": "A Markdown representation of the chunk (except for tables, which are represented in HTML).",
          "title": "Text",
          "type": "string"
        },
        "grounding": {
          "description": "The specific spatial location(s) of this chunk within the original document. A chunk can have multiple groundings, for example if it is a single paragraph split across two columns.",
          "items": {
            "$ref": "#/$defs/Grounding"
          },
          "title": "Grounding",
          "type": "array"
        },
        "chunk_type": {
          "$ref": "#/$defs/ChunkType",
          "description": "The detected type of the chunk, matching its role within the document."
        },
        "chunk_id": {
          "description": "A UUID for the chunk. This matches UUIDs in the HTML comments in the Markdown output.",
          "title": "Chunk Id",
          "type": "string"
        },
        "rotation_angle": {
          "description": "The rotation angle applied to the chunk (in degrees).",
          "title": "Rotation Angle",
          "type": "number",
          "default": 0
        }
      },
      "required": [
        "text",
        "grounding",
        "chunk_type",
        "chunk_id"
      ],
      "title": "DocumentAnalysisChunk",
      "type": "object"
    },
    "Grounding": {
      "description": "Grounding for a chunk, specifying the location within the original document",
      "properties": {
        "box": {
          "$ref": "#/$defs/GroundingBox",
          "description": "A bounding box (in relative coordinates) establishing the chunk's spatial location within the page."
        },
        "page": {
          "description": "The chunk's 0-indexed page within the original document.",
          "title": "Page",
          "type": "integer"
        }
      },
      "required": [
        "box",
        "page"
      ],
      "title": "Grounding",
      "type": "object"
    },
    "GroundingBox": {
      "description": "Bounding box, expressed in relative coordinates (float from 0 to 1)",
      "properties": {
        "l": {
          "description": "Left coordinate",
          "title": "L",
          "type": "number"
        },
        "t": {
          "description": "Top coordinate",
          "title": "T",
          "type": "number"
        },
        "r": {
          "description": "Right coordinate",
          "title": "R",
          "type": "number"
        },
        "b": {
          "description": "Bottom coordinate",
          "title": "B",
          "type": "number"
        }
      },
      "required": [
        "l",
        "t",
        "r",
        "b"
      ],
      "title": "GroundingBox",
      "type": "object"
    },
    "ChunkType": {
      "description": "Type of the chunk, signifying its role within the document",
      "enum": [
        "logo",
        "card",
        "attestation",
        "scan_code",
        "form",
        "table",
        "figure",
        "text",
        "marginalia",
        "title",
        "page_header",
        "page_footer",
        "page_number",
        "key_value"
      ],
      "title": "ChunkType",
      "type": "string"
    },
    "AgenticDocumentAnalysisBase": {
      "description": "The main data object containing parsed document content",
      "properties": {
        "markdown": {
          "description": "A Markdown representation of the document, potentially with HTML comments at the end of each chunk. Can be a single string or an array of strings (one per page).",
          "anyOf": [
            {
              "type": "string"
            },
            {
              "type": "array",
              "items": {
                "type": "string"
              }
            }
          ],
          "title": "Markdown"
        },
        "extracted_schema": {
          "description": "The extracted data based on the fields_schema provided in the request. Only present if fields_schema was provided.",
          "anyOf": [
            {
              "type": "object"
            },
            {
              "type": "null"
            }
          ],
          "title": "Extracted Schema"
        },
        "extraction_metadata": {
          "description": "Metadata about the extracted fields, including chunk references and confidence scores. Only present if fields_schema was provided.",
          "anyOf": [
            {
              "type": "object"
            },
            {
              "type": "null"
            }
          ],
          "title": "Extraction Metadata"
        },
        "chunks": {
          "description": "List of chunks extracted from the document in reading order.",
          "items": {
            "$ref": "#/$defs/DocumentAnalysisChunk"
          },
          "title": "Chunks",
          "type": "array"
        }
      },
      "required": [
        "markdown",
        "chunks"
      ],
      "title": "AgenticDocumentAnalysisBase",
      "type": "object"
    },
    "AgenticDocAnalysisPageError": {
      "description": "Error information for a page that failed to process",
      "properties": {
        "page_num": {
          "description": "The page number that failed (0-indexed)",
          "title": "Page Num",
          "type": "integer"
        },
        "error": {
          "description": "Error message describing what went wrong",
          "title": "Error",
          "type": "string"
        },
        "error_code": {
          "description": "HTTP error code",
          "title": "Error Code",
          "type": "integer"
        }
      },
      "required": [
        "page_num",
        "error",
        "error_code"
      ],
      "title": "AgenticDocAnalysisPageError",
      "type": "object"
    }
  },
  "properties": {
    "data": {
      "$ref": "#/$defs/AgenticDocumentAnalysisBase",
      "description": "The main data object containing the parsed document"
    },
    "errors": {
      "description": "List of errors that occurred during page processing. Pages with errors are excluded from the data object.",
      "items": {
        "$ref": "#/$defs/AgenticDocAnalysisPageError"
      },
      "title": "Errors",
      "type": "array"
    },
    "extraction_error": {
      "description": "Error message if field extraction failed. Only present if fields_schema was provided and extraction encountered an error.",
      "anyOf": [
        {
          "type": "string"
        },
        {
          "type": "null"
        }
      ],
      "title": "Extraction Error"
    },
    "metadata": {
      "description": "Additional metadata about the processing job",
      "anyOf": [
        {
          "type": "object"
        },
        {
          "type": "null"
        }
      ],
      "title": "Metadata"
    }
  },
  "required": [
    "data"
  ],
  "title": "AgenticDocAnalysisResponse",
  "type": "object"
}

Grounding Information

When the legacy API parses a document, it breaks the content into chunks, which are discrete elements extracted from a document, like blocks of text or tables. Each chunk includes a grounding, which represents the location of the chunk in the document. The grounding includes:
  • the page number that the chunk is on
  • the relative coordinates of the bounding box of the chunk
For example, below is the JSON output for a text chunk from the legacy API. The grounding array indicates that the text is on the first page (page 0), and the box object indicates the bounding box coordinates.
{
      "text": "## INSURANCE COMPANY",
      "grounding": [
        {
          "box": {
            "l": 0.35,
            "t": 0.22619999999999998,
            "r": 0.565,
            "b": 0.24033749999999998
          },
          "page": 0
        }
      ],
      "chunk_type": "text",
      "chunk_id": "9475461e-0686-4b16-b503-ccec7d7f115c"
    }