> ## Documentation Index
> Fetch the complete documentation index at: https://docs.chunkr.ai/llms.txt
> Use this file to discover all available pages before exploring further.

# List Tasks

> Lists tasks for the authenticated user with cursor-based pagination
and optional filtering by date range. Supports ascending or descending
sort order and optional inclusion of chunks/base64 URLs.


## OpenAPI

````yaml https://api.chunkr.ai/docs/openapi.json get /tasks
openapi: 3.1.0
info:
  title: Chunkr API
  description: >-
    API service for document layout analysis and chunking to convert document
    into RAG/LLM-ready data.
  contact:
    name: Chunkr
    url: https://chunkr.ai
    email: ishaan@lumina.sh
  license:
    name: ''
  version: 2.62.0
servers:
  - url: https://api.chunkr.ai
    description: Chunkr API
security: []
tags:
  - name: Files
    description: Endpoints for uploading and managing files
  - name: Health
    description: Endpoint for checking the health of the service.
  - name: Tasks
    description: Endpoints for uploading and managing tasks
  - name: Webhook
    description: Endpoints for managing webhooks
paths:
  /tasks:
    get:
      tags:
        - Tasks
      summary: List Tasks
      description: |-
        Lists tasks for the authenticated user with cursor-based pagination
        and optional filtering by date range. Supports ascending or descending
        sort order and optional inclusion of chunks/base64 URLs.
      operationId: list_tasks_route
      parameters:
        - name: base64_urls
          in: query
          description: >-
            Whether to return base64 encoded URLs. If false, the URLs will be
            returned as presigned URLs.
          required: false
          schema:
            type: boolean
        - name: end
          in: query
          description: End date
          required: false
          schema:
            type: string
            format: date-time
        - name: include_chunks
          in: query
          description: Whether to include chunks in the output response
          required: false
          schema:
            type: boolean
        - name: limit
          in: query
          description: Number of tasks per page
          required: false
          schema:
            type: integer
            format: int64
        - name: cursor
          in: query
          description: Cursor for pagination (timestamp)
          required: false
          schema:
            type: string
            format: date-time
        - name: start
          in: query
          description: Start date
          required: false
          schema:
            type: string
            format: date-time
        - name: sort
          in: query
          description: 'Sort order: ''asc'' for ascending, ''desc'' for descending (default)'
          required: false
          schema:
            $ref: '#/components/schemas/SortOrder'
        - name: task_types
          in: query
          description: Filter by one or more task types
          required: false
          schema:
            type: array
            items:
              $ref: '#/components/schemas/TaskType'
        - name: statuses
          in: query
          description: Filter by one or more statuses
          required: false
          schema:
            type: array
            items:
              $ref: '#/components/schemas/Status'
      responses:
        '200':
          description: Paginated list of tasks
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/TasksResponse'
        '401':
          description: Unauthorized
          content:
            text/plain:
              schema:
                type: string
        '500':
          description: Server error
          content:
            text/plain:
              schema:
                type: string
      security:
        - api_key: []
components:
  schemas:
    SortOrder:
      type: string
      enum:
        - asc
        - desc
    TaskType:
      type: string
      enum:
        - Parse
        - Extract
    Status:
      type: string
      description: The status of the task.
      enum:
        - Starting
        - Processing
        - Succeeded
        - Failed
        - Cancelled
    TasksResponse:
      type: object
      required:
        - tasks
        - has_more
      properties:
        has_more:
          type: boolean
        next_cursor:
          type:
            - string
            - 'null'
          format: date-time
        tasks:
          type: array
          items:
            $ref: '#/components/schemas/TaskResponse'
    TaskResponse:
      type: object
      required:
        - configuration
        - completed
        - created_at
        - file_info
        - message
        - status
        - task_type
        - task_id
        - version_info
      properties:
        completed:
          type: boolean
          description: >-
            True when the task reaches a terminal state i.e. `status` is
            `Succeeded` or `Failed` or `Cancelled`
        configuration:
          $ref: '#/components/schemas/TaskConfiguration'
        created_at:
          type: string
          format: date-time
          description: The date and time when the task was created and queued.
        expires_at:
          type:
            - string
            - 'null'
          format: date-time
          description: The date and time when the task will expire.
        file_info:
          $ref: '#/components/schemas/FileInfo'
        finished_at:
          type:
            - string
            - 'null'
          format: date-time
          description: The date and time when the task was finished.
        input_file_url:
          type:
            - string
            - 'null'
          description: |-
            The presigned URL of the input file.
            Deprecated use `file_info.url` instead.
          deprecated: true
        message:
          type: string
          description: A message describing the task's status or any errors that occurred.
        output:
          oneOf:
            - type: 'null'
            - $ref: '#/components/schemas/TaskOutput'
        parse_task_id:
          type:
            - string
            - 'null'
          description: The ID of the source `parse` task that was used for the task
        started_at:
          type:
            - string
            - 'null'
          format: date-time
          description: The date and time when the task was started.
        status:
          $ref: '#/components/schemas/Status'
        task_id:
          type: string
          description: The unique identifier for the task.
        task_type:
          $ref: '#/components/schemas/TaskType'
        task_url:
          type:
            - string
            - 'null'
          description: The presigned URL of the task.
        version_info:
          $ref: '#/components/schemas/VersionInfo'
    TaskConfiguration:
      oneOf:
        - $ref: '#/components/schemas/ParseConfiguration'
          description: Configuration for a parsing task
        - $ref: '#/components/schemas/ExtractConfiguration'
          description: Configuration for an extraction task
      description: >-
        Unified configuration type that can represent either parse or extract
        configurations
    FileInfo:
      type: object
      description: Information about the input file.
      required:
        - url
      properties:
        mime_type:
          type:
            - string
            - 'null'
          description: The MIME type of the file.
        name:
          type:
            - string
            - 'null'
          description: The name of the file.
        page_count:
          type:
            - integer
            - 'null'
          format: int32
          description: The number of pages in the file.
          minimum: 0
        ss_cell_count:
          type:
            - integer
            - 'null'
          format: int32
          description: The number of cells in the file. Only used for spreadsheets.
          minimum: 0
        url:
          type: string
          description: The presigned URL/Base64 encoded URL of the input file.
    TaskOutput:
      oneOf:
        - $ref: '#/components/schemas/ParseOutputResponse'
          description: Output from a parsing task
        - $ref: '#/components/schemas/ExtractOutputResponse'
          description: Output from an extraction task
      description: Unified output type that can represent either parse or extract results
    VersionInfo:
      type: object
      description: Version information for the task.
      required:
        - server_version
        - client_version
      properties:
        client_version:
          $ref: '#/components/schemas/ClientVersion'
          description: The version of the client.
        server_version:
          type: string
          description: The version of the server.
    ParseConfiguration:
      type: object
      title: Parse
      properties:
        chunk_processing:
          oneOf:
            - $ref: '#/components/schemas/ChunkProcessing'
          default:
            ignore_headers_and_footers: null
            target_length: 4096
            tokenizer:
              Enum: Word
        error_handling:
          oneOf:
            - $ref: '#/components/schemas/ErrorHandlingStrategy'
          default: Fail
        ocr_strategy:
          oneOf:
            - $ref: '#/components/schemas/OcrStrategy'
          default: All
        pipeline:
          oneOf:
            - $ref: '#/components/schemas/PipelineType'
          default: Chunkr
        segment_processing:
          oneOf:
            - type: 'null'
            - $ref: '#/components/schemas/SegmentProcessing'
          default: null
        segmentation_strategy:
          oneOf:
            - $ref: '#/components/schemas/SegmentationStrategy'
          default: LayoutAnalysis
    ExtractConfiguration:
      type: object
      title: Extract
      required:
        - schema
      properties:
        parse_configuration:
          oneOf:
            - type: 'null'
            - $ref: '#/components/schemas/ParseConfiguration'
              description: |-
                Optional configuration for the `parse` task.
                Can not be used if `file` is a `task_id`.
        schema:
          type: object
          description: The schema to be used for the extraction.
        system_prompt:
          type:
            - string
            - 'null'
          description: The system prompt to be used for the extraction.
          default: >-
            You are an expert at structured data extraction. You will be given
            parsed text from a document and should convert it into the given
            structure.
    ParseOutputResponse:
      type: object
      title: Parse
      description: The processed results of a document parsing task
      required:
        - chunks
      properties:
        chunks:
          type: array
          items:
            $ref: '#/components/schemas/Chunk'
          description: >-
            Collection of document chunks, where each chunk contains one or more
            segments
        file_name:
          type:
            - string
            - 'null'
          description: The name of the file. Deprecated use `file_info.name` instead.
          deprecated: true
        mime_type:
          type:
            - string
            - 'null'
          description: >-
            The MIME type of the file. Deprecated use `file_info.mime_type`
            instead.
          deprecated: true
        page_count:
          type:
            - integer
            - 'null'
          format: int32
          description: >-
            The number of pages in the file. Deprecated use
            `file_info.page_count` instead.
          deprecated: true
          minimum: 0
        pages:
          type:
            - array
            - 'null'
          items:
            $ref: '#/components/schemas/Page'
          description: >-
            The pages of the file. Includes the image and metadata for each
            page.
        pdf_url:
          type:
            - string
            - 'null'
          description: The presigned URL of the PDF file.
    ExtractOutputResponse:
      type: object
      title: Extract
      description: >-
        The processed results of a document extraction task.


        Shapes:

        - `results`: JSON matching the user-provided schema.

        - `citations`: mirror of `results`; only leaf positions (primitive or
        array-of-primitives) contain a `Vec<Citation>` supporting that field.

        - `metrics`: mirror of `results`; only leaf positions contain a
        `Metrics` object for that field.
      required:
        - results
        - citations
        - metrics
      properties:
        citations:
          description: >-
            Mirror of `results`; leaves are `Vec<Citation>` for the
            corresponding field


            Example:


            ```json

            {
              "field_name": [
                {
                  "citation_id": "abc1234",
                  "citation_type": "Segment",
                  "bboxes": [
                    {
                      "left": 10,
                      "top": 20,
                      "width": 100,
                      "height": 18
                    }
                  ],
                  "content": "Example content",
                  "segment_id": "seg_001",
                  "segment_type": "Text",
                  "page_number": 1,
                  "page_height": 297,
                  "page_width": 210,
                  "ss_ranges": ["A1:C10"],
                  "ss_sheet_name": "Sheet1"
                }
              ]
            }

            ```
        metrics:
          description: >-
            Mirror of `results`; leaves contain a `Metrics` object for the
            corresponding field


            Example:


            ```json

            { "field_name": { "confidence": "High" } }

            ```
        results:
          description: |-
            JSON data that matches the provided schema

            Example:

            ```json
            { "field_name": "value" }
            ```
    ClientVersion:
      oneOf:
        - type: string
          title: Legacy
          description: Legacy SDK without version information (< 0.3.3)
          enum:
            - Legacy
        - type: object
          title: ManualSdk
          description: Version of the current manually-maintained SDK
          required:
            - ManualSdk
          properties:
            ManualSdk:
              type: string
              description: Version of the current manually-maintained SDK
        - type: object
          title: GeneratedSdk
          description: Version of the auto-generated SDK
          required:
            - GeneratedSdk
          properties:
            GeneratedSdk:
              type: string
              description: Version of the auto-generated SDK
        - type: string
          title: Unspecified
          description: Unspecified/raw API request without any client version headers
          enum:
            - Unspecified
      description: Represents different types of SDK clients and their versions
    ChunkProcessing:
      type: object
      description: Controls the setting for the chunking and post-processing of each chunk.
      properties:
        ignore_headers_and_footers:
          type:
            - boolean
            - 'null'
          description: 'DEPRECATED: use `segment_processing.ignore` instead'
          deprecated: true
        target_length:
          type: integer
          format: int32
          description: >-
            The target number of words in each chunk. If 0, each chunk will
            contain a single segment.
          default: 4096
          minimum: 0
        tokenizer:
          oneOf:
            - $ref: '#/components/schemas/TokenizerType'
              description: The tokenizer to use for the chunking process.
          default: Word
    ErrorHandlingStrategy:
      type: string
      description: >-
        Controls how errors are handled during processing:

        - `Fail`: Stops processing and fails the task when any error occurs

        - `Continue`: Attempts to continue processing despite non-critical
        errors (eg. LLM refusals etc.)
      enum:
        - Fail
        - Continue
    OcrStrategy:
      type: string
      description: >-
        Controls the Optical Character Recognition (OCR) strategy.

        - `All`: Processes all pages with OCR. (Latency penalty: ~0.5 seconds
        per page)

        - `Auto`: Selectively applies OCR only to pages with missing or
        low-quality text. When text layer is present the bounding boxes from the
        text layer are used.
      enum:
        - All
        - Auto
    PipelineType:
      type: string
      enum:
        - Azure
        - Chunkr
      deprecated: true
    SegmentProcessing:
      type: object
      description: >-
        Configuration for how each document segment is processed and formatted.


        Each segment has sensible defaults, but you can override specific
        settings:

        - `format`: Output as `Html` or `Markdown`

        - `strategy`: `Auto` (rule-based), `LLM` (AI-generated), or `Ignore`
        (skip)

        - `crop_image`: Whether to crop images to segment bounds

        - `extended_context`: Use full page as context for LLM processing

        - `description`: Generate descriptions for segments


        **Defaults per segment type:** Check the documentation for more details.


        Only specify the fields you want to change - everything else uses the
        defaults.
      properties:
        Caption:
          oneOf:
            - type: 'null'
            - $ref: '#/components/schemas/GenerationConfig'
          default: null
        Footnote:
          oneOf:
            - type: 'null'
            - $ref: '#/components/schemas/GenerationConfig'
          default: null
        FormRegion:
          oneOf:
            - type: 'null'
            - $ref: '#/components/schemas/GenerationConfig'
              description: >-
                New segment types - must be Optional for backwards
                compatibility.
          default: null
        Formula:
          oneOf:
            - type: 'null'
            - $ref: '#/components/schemas/GenerationConfig'
          default: null
        GraphicalItem:
          oneOf:
            - type: 'null'
            - $ref: '#/components/schemas/GenerationConfig'
          default: null
        Legend:
          oneOf:
            - type: 'null'
            - $ref: '#/components/schemas/GenerationConfig'
          default: null
        LineNumber:
          oneOf:
            - type: 'null'
            - $ref: '#/components/schemas/GenerationConfig'
          default: null
        ListItem:
          oneOf:
            - type: 'null'
            - $ref: '#/components/schemas/GenerationConfig'
          default: null
        Page:
          oneOf:
            - type: 'null'
            - $ref: '#/components/schemas/GenerationConfig'
          default: null
        PageFooter:
          oneOf:
            - type: 'null'
            - $ref: '#/components/schemas/GenerationConfig'
          default: null
        PageHeader:
          oneOf:
            - type: 'null'
            - $ref: '#/components/schemas/GenerationConfig'
          default: null
        PageNumber:
          oneOf:
            - type: 'null'
            - $ref: '#/components/schemas/GenerationConfig'
          default: null
        Picture:
          oneOf:
            - type: 'null'
            - $ref: '#/components/schemas/GenerationConfig'
          default: null
        Table:
          oneOf:
            - type: 'null'
            - $ref: '#/components/schemas/GenerationConfig'
          default: null
        Text:
          oneOf:
            - type: 'null'
            - $ref: '#/components/schemas/GenerationConfig'
          default: null
        Title:
          oneOf:
            - type: 'null'
            - $ref: '#/components/schemas/GenerationConfig'
          default: null
        Unknown:
          oneOf:
            - type: 'null'
            - $ref: '#/components/schemas/GenerationConfig'
          default: null
    SegmentationStrategy:
      type: string
      description: >-
        Controls the segmentation strategy:

        - `LayoutAnalysis`: Analyzes pages for layout elements (e.g., `Table`,
        `Picture`, `Formula`, etc.) using bounding boxes. Provides fine-grained
        segmentation and better chunking.

        - `Page`: Treats each page as a single segment. Faster processing, but
        without layout element detection and only simple chunking.
      enum:
        - LayoutAnalysis
        - Page
    Chunk:
      type: object
      required:
        - chunk_length
        - segments
      properties:
        chunk_id:
          type: string
          description: The unique identifier for the chunk.
        chunk_length:
          type: integer
          format: int32
          description: >-
            The total number of tokens in the `embed` field of the chunk.
            Calculated by the `tokenizer`.
          minimum: 0
        content:
          type:
            - string
            - 'null'
          description: >-
            The content of the chunk. This is the text that is generated by
            combining the `content` field from each segment.

            Can be used provided as context to the LLM.
        embed:
          type:
            - string
            - 'null'
          description: >-
            Suggested text to be embedded for the chunk. This text is generated
            by combining the `embed` field from each segment.
        segments:
          type: array
          items:
            $ref: '#/components/schemas/Segment'
          description: >-
            Collection of document segments that form this chunk.

            When `target_chunk_length` > 0, contains the maximum number of
            segments

            that fit within that length (segments remain intact).

            Otherwise, contains exactly one segment.
    Page:
      type: object
      required:
        - image
        - page_number
        - page_height
        - page_width
      properties:
        dpi:
          type:
            - number
            - 'null'
          format: float
          description: DPI of the page/sheet. All cropped images are scaled to this DPI.
        image:
          type: string
          description: The presigned URL of the page/sheet image.
        page_height:
          type: number
          format: float
          description: The number of pages in the file.
        page_number:
          type: integer
          format: int32
          description: The number of pages in the file.
          minimum: 0
        page_width:
          type: number
          format: float
          description: The number of pages in the file.
        ss_sheet_name:
          type:
            - string
            - 'null'
          description: >-
            The name of the sheet containing the page. Only used for
            Spreadsheets.
    TokenizerType:
      oneOf:
        - type: object
          title: Enum
          description: Use one of the predefined tokenizer types
          required:
            - Enum
          properties:
            Enum:
              $ref: '#/components/schemas/Tokenizer'
              description: Use one of the predefined tokenizer types
        - type: object
          title: String
          description: |-
            Use any Hugging Face tokenizer by specifying its model ID
            Examples: "Qwen/Qwen-tokenizer", "facebook/bart-large"
          required:
            - String
          properties:
            String:
              type: string
              description: |-
                Use any Hugging Face tokenizer by specifying its model ID
                Examples: "Qwen/Qwen-tokenizer", "facebook/bart-large"
      description: >-
        Specifies which tokenizer to use for the chunking process.


        This type supports two ways of specifying a tokenizer:

        1. Using a predefined tokenizer from the `Tokenizer` enum

        2. Using any Hugging Face tokenizer by providing its model ID as a
        string
           (e.g. "facebook/bart-large", "Qwen/Qwen-tokenizer", etc.)

        When using a string, any valid Hugging Face tokenizer ID can be
        specified,

        which will be loaded using the Hugging Face tokenizers library.
    GenerationConfig:
      type: object
      description: >-
        Controls the processing and generation for the segment.

        - `crop_image` controls whether to crop the file's images to the
        segment's bounding box.
          The cropped image will be stored in the segment's `image` field. Use `All` to always crop,
          or `Auto` to only crop when needed for post-processing.
        - `format` specifies the output format: `Html` or `Markdown`

        - `strategy` determines how the content is generated: `Auto`, `LLM`, or
        `Ignore`
          - `Auto`: Process content automatically
          - `LLM`: Use large language models for processing
          - `Ignore`: Exclude segments from final output
        - `description` enables LLM-generated descriptions for segments.
          **Note:** This uses chunkr's own VLM models and is not configurable via LLM processing configuration.
        - `extended_context` uses the full page image as context for LLM
        generation.
      properties:
        crop_image:
          oneOf:
            - type: 'null'
            - $ref: '#/components/schemas/CroppingStrategy'
          default: null
        description:
          type:
            - boolean
            - 'null'
          description: Generate LLM descriptions for this segment
          default: null
        extended_context:
          type:
            - boolean
            - 'null'
          description: Use the full page image as context for LLM generation
          default: null
        format:
          oneOf:
            - type: 'null'
            - $ref: '#/components/schemas/SegmentFormat'
          default: null
        llm:
          type:
            - string
            - 'null'
          deprecated: true
        strategy:
          oneOf:
            - type: 'null'
            - $ref: '#/components/schemas/GenerationStrategy'
          default: null
    Segment:
      type: object
      required:
        - bbox
        - page_height
        - page_width
        - page_number
        - segment_id
        - segment_type
      properties:
        bbox:
          $ref: '#/components/schemas/BoundingBox'
        confidence:
          type:
            - number
            - 'null'
          format: float
          description: Confidence score of the layout analysis model
        content:
          type: string
          description: >-
            Content of the segment, will be either HTML or Markdown, depending
            on format chosen.
        description:
          type:
            - string
            - 'null'
          description: Description of the segment, generated by the LLM.
        embed:
          type:
            - string
            - 'null'
          description: Embeddable content of the segment.
        image:
          type:
            - string
            - 'null'
          description: Presigned URL to the image of the segment.
        llm:
          type:
            - string
            - 'null'
          description: LLM representation of the segment.
          deprecated: true
        ocr:
          type:
            - array
            - 'null'
          items:
            $ref: '#/components/schemas/OCRResult'
          description: OCR results for the segment.
        page_height:
          type: number
          format: float
          description: Height of the page/sheet containing the segment.
        page_number:
          type: integer
          format: int32
          description: Page number/Sheet number of the segment.
          minimum: 0
        page_width:
          type: number
          format: float
          description: Width of the page/sheet containing the segment.
        segment_id:
          type: string
          description: Unique identifier for the segment.
        segment_length:
          type:
            - integer
            - 'null'
          format: int32
          description: Length of the segment in tokens.
          minimum: 0
        segment_type:
          $ref: '#/components/schemas/SegmentType'
        ss_cells:
          type:
            - array
            - 'null'
          items:
            $ref: '#/components/schemas/Cell'
          description: Cells of the segment. Only used for Spreadsheets.
        ss_header_bbox:
          oneOf:
            - type: 'null'
            - $ref: '#/components/schemas/BoundingBox'
              description: >-
                Bounding box of the header of the segment, if found. Only used
                for Spreadsheets.
        ss_header_ocr:
          type:
            - array
            - 'null'
          items:
            $ref: '#/components/schemas/OCRResult'
          description: >-
            OCR results of the header of the segment, if found. Only used for
            Spreadsheets.
        ss_header_range:
          type:
            - string
            - 'null'
          description: >-
            Header range of the segment, if found.

            The header can have overlap with the `segment.range` if the table
            contains the header,

            if the header is located in a different sheet, the header range will
            have no overlap with the `segment.range`.

            Only used for Spreadsheets.
        ss_header_text:
          type:
            - string
            - 'null'
          description: >-
            Text content of the header of the segment, if found. Only used for
            Spreadsheets.
        ss_range:
          type:
            - string
            - 'null'
          description: >-
            Range of the segment in Excel notation (e.g., A1:B5). Only used for
            Spreadsheets.
        ss_sheet_name:
          type:
            - string
            - 'null'
          description: >-
            Name of the sheet containing the segment. Only used for
            Spreadsheets.
        text:
          type: string
          description: Text content of the segment. Calculated by the OCR results.
    Tokenizer:
      type: string
      description: >-
        Common tokenizers used for text processing.


        These values represent standard tokenization approaches and popular
        pre-trained

        tokenizers from the Hugging Face ecosystem.
      enum:
        - Word
        - Cl100kBase
        - XlmRobertaBase
        - BertBaseUncased
    CroppingStrategy:
      type: string
      description: |-
        Controls the cropping strategy for an item (e.g. segment, chunk, etc.)
        - `All` crops all images in the item
        - `Auto` crops images only if required for post-processing
      enum:
        - All
        - Auto
    SegmentFormat:
      type: string
      description: The format for the `content` field of a segment.
      enum:
        - Html
        - Markdown
    GenerationStrategy:
      type: string
      description: The strategy for generating the `content` field of a segment.
      enum:
        - LLM
        - Auto
        - Ignore
    BoundingBox:
      type: object
      description: Bounding box for an item. It is used for segments and OCR results.
      required:
        - left
        - top
        - width
        - height
      properties:
        height:
          type: number
          format: float
          description: The height of the bounding box.
        left:
          type: number
          format: float
          description: The left coordinate of the bounding box.
        top:
          type: number
          format: float
          description: The top coordinate of the bounding box.
        width:
          type: number
          format: float
          description: The width of the bounding box.
    OCRResult:
      type: object
      description: OCR results for a segment
      required:
        - bbox
        - text
      properties:
        bbox:
          $ref: '#/components/schemas/BoundingBox'
        confidence:
          type:
            - number
            - 'null'
          format: float
          description: The confidence score of the recognized text.
        ocr_id:
          type: string
          description: The unique identifier for the OCR result.
        ss_cell_ref:
          type:
            - string
            - 'null'
          description: >-
            Excel-style cell reference (e.g., "A1" or "A1:B2") when OCR
            originates from a spreadsheet cell
        text:
          type: string
          description: The recognized text of the OCR result.
    SegmentType:
      type: string
      description: All the possible types for a segment.
      enum:
        - Caption
        - Footnote
        - Formula
        - FormRegion
        - GraphicalItem
        - Legend
        - LineNumber
        - ListItem
        - Page
        - PageFooter
        - PageHeader
        - PageNumber
        - Picture
        - Table
        - Text
        - Title
        - Unknown
        - SectionHeader
    Cell:
      type: object
      required:
        - cell_id
        - text
        - range
      properties:
        cell_id:
          type: string
          description: The cell ID.
        formula:
          type:
            - string
            - 'null'
          description: Formula of the cell.
        hyperlink:
          type:
            - string
            - 'null'
          description: >-
            Hyperlink URL if the cell contains a link (e.g.,
            "https://www.chunkr.ai").
        range:
          type: string
          description: Range of the cell.
        style:
          oneOf:
            - type: 'null'
            - $ref: '#/components/schemas/CellStyle'
              description: >-
                Styling information for the cell including colors, fonts, and
                formatting.
        text:
          type: string
          description: Text content of the cell.
        value:
          type:
            - string
            - 'null'
          description: >-
            The computed/evaluated value of the cell. This represents the actual
            result after evaluating any formulas,

            as opposed to the raw text content. For cells with formulas, this is
            the calculated result;

            for cells with static content, this is typically the same as the
            text field.


            Example: text might show "3.14" (formatted to 2 decimal places)
            while value could be "3.141592653589793" (full precision).
    CellStyle:
      type: object
      properties:
        align:
          oneOf:
            - type: 'null'
            - $ref: '#/components/schemas/Alignment'
              description: Alignment of the cell content.
        bg_color:
          type:
            - string
            - 'null'
          description: Background color of the cell (e.g., "#FFFFFF" or "#DAE3F3").
        font_face:
          type:
            - string
            - 'null'
          description: Font face/family of the cell (e.g., "Arial", "Daytona").
        is_bold:
          type:
            - boolean
            - 'null'
          description: Whether the cell content is bold.
        text_color:
          type:
            - string
            - 'null'
          description: Text color of the cell (e.g., "#000000" or "red").
        valign:
          oneOf:
            - type: 'null'
            - $ref: '#/components/schemas/VerticalAlignment'
              description: Vertical alignment of the cell content.
    Alignment:
      type: string
      enum:
        - Left
        - Center
        - Right
        - Justify
    VerticalAlignment:
      type: string
      enum:
        - Top
        - Middle
        - Bottom
        - Baseline
  securitySchemes:
    api_key:
      type: apiKey
      in: header
      name: Authorization

````