{
"$defs": {
"Chunk": {
"description": "An extracted chunk from the document",
"properties": {
"text": {
"description": "A Markdown representation of the chunk (except for tables, which are represented in HTML).",
"title": "Text",
"type": "string"
},
"grounding": {
"description": "The specific spatial location(s) of this chunk within the original document. A chunk can have multiple groundings, for example if it is single paragraph split across two columns.",
"items": {
"$ref": "#/$defs/ChunkGrounding"
},
"title": "Grounding",
"type": "array"
},
"chunk_type": {
"$ref": "#/$defs/ChunkType",
"description": "The detected type of the chunk, matching its role within the document."
},
"chunk_id": {
"description": "A UUID for the chunk. This matches UUIDs in the HTML comments in the Markdown output.",
"title": "Chunk Id",
"type": "string"
}
},
"required": [
"text",
"grounding",
"chunk_type",
"chunk_id"
],
"title": "Chunk",
"type": "object"
},
"ChunkGrounding": {
"description": "Grounding for a chunk, specifying the location within the original document",
"properties": {
"box": {
"$ref": "#/$defs/ChunkGroundingBox",
"description": "A bounding box (in relative coordinates) establishing the chunk's spatial location within the page."
},
"page": {
"description": "The chunk's 0-indexed page within the original document.",
"title": "Page",
"type": "integer"
}
},
"required": [
"box",
"page"
],
"title": "ChunkGrounding",
"type": "object"
},
"ChunkGroundingBox": {
"description": "Bounding box, expressed in relative coordinates (float from 0 to 1)",
"properties": {
"l": {
"title": "L",
"type": "number"
},
"t": {
"title": "T",
"type": "number"
},
"r": {
"title": "R",
"type": "number"
},
"b": {
"title": "B",
"type": "number"
}
},
"required": [
"l",
"t",
"r",
"b"
],
"title": "ChunkGroundingBox",
"type": "object"
},
"ChunkType": {
"description": "Type of the chunk, signifying its role within the document",
"enum": [
"title",
"page_header",
"page_footer",
"page_number",
"key_value",
"form",
"table",
"figure",
"text"
],
"title": "ChunkType",
"type": "string"
}
},
"properties": {
"markdown": {
"description": "A Markdown representation of the document, potentially with HTML comments at the end of the each chunk. You can use this as context to an LLM.",
"title": "Markdown",
"type": "string"
},
"chunks": {
"description": "List of chunks extracted from the document in reading order.",
"items": {
"$ref": "#/$defs/Chunk"
},
"title": "Chunks",
"type": "array"
}
},
"required": [
"markdown",
"chunks"
],
"title": "APIResponse",
"type": "object"
}