Developer Quickstart

Follow these steps to set up your account and integrate with our API.

Visit Chunkr AI
Click on “Login” and create your account
Once logged in, navigate to “API Keys” in the dashboard

Install our client SDK

The Python SDK is currently in alpha. The --pre flag is required to install pre-release versions.

pip install --pre chunkr-ai

Parse a document

import os
import time

from chunkr_ai import Chunkr
from pydantic import BaseModel

# Initialize the client
client = Chunkr(api_key=os.environ["CHUNKR_API_KEY"])

# Create a parse task using a file URL
parse_task = client.tasks.parse.create(
    file="https://s3.us-east-1.amazonaws.com/chunkr-web/uploads/invoice.pdf"
)

# Alternatively, upload a local file first
# with open('path/to/doc.pdf', 'rb') as f:
#     uploaded = client.files.create(file=f)
# parse_task = client.tasks.parse.create(file=uploaded.url)

# Wait for parse task to complete
while not parse_task.completed:
    parse_task = client.tasks.parse.get(task_id=parse_task.task_id)
    print(f"Parse Status: {parse_task.status}")
    time.sleep(3)

if parse_task.status == "Succeeded":
    # Do something with the output
    pass
else:  # Could be "Failed" or "Cancelled"
    print(f"Parse Status: {parse_task.status}")

Extract structured data

class Invoice(BaseModel):
    invoice_number: str
    invoice_date: str
    total_amount: float

# Use the parse task ID to create an extract task
extract_task = client.tasks.extract.create(
    file=parse_task.task_id,
    schema=Invoice.model_json_schema() # Convert Pydantic model to JSON schema
)

# Wait for extract task to complete
while not extract_task.completed:
    extract_task = client.tasks.extract.get(task_id=extract_task.task_id)
    print(f"Extract Status: {extract_task.status}")
    time.sleep(3)

Explore Output

# Get parse results and print first 5 chunk contents
if parse_task.output is not None:
    for chunk in parse_task.output.chunks[:5]:
        if chunk.content is not None:
            print(chunk.content[:200])

# Get extract results and print schema fields
if extract_task.status == "Succeeded" and extract_task.output is not None:
    # Validate the results against the schema
    invoice = Invoice.model_validate(extract_task.output.results)
    # Do something with the invoice
    print(invoice)

You can also explore the output through our web interface in more detail.

Full example (copy-paste)

import os
import time

from chunkr_ai import Chunkr
from pydantic import BaseModel

# Initialize the client
client = Chunkr(api_key=os.environ["CHUNKR_API_KEY"])

# Create a parse task using a file URL
parse_task = client.tasks.parse.create(
    file="https://s3.us-east-1.amazonaws.com/chunkr-web/uploads/invoice.pdf"
)

# Alternatively, upload a local file first
# with open('path/to/doc.pdf', 'rb') as f:
#     uploaded = client.files.create(file=f)
# parse_task = client.tasks.parse.create(file=uploaded.url)

# Wait for parse task to complete
while not parse_task.completed:
    parse_task = client.tasks.parse.get(task_id=parse_task.task_id)
    print(f"Parse Status: {parse_task.status}")
    time.sleep(3)

if parse_task.status == "Succeeded":
    # Do something with the output
    pass
else: # Could be "Failed" or "Cancelled"
    print(f"Parse Status: {parse_task.status}")

class Invoice(BaseModel):
    invoice_number: str
    invoice_date: str
    total_amount: float


extract_task = client.tasks.extract.create(
    file=parse_task.task_id,
    schema=Invoice.model_json_schema() # Convert Pydantic model to JSON schema
)

# Wait for extract task to complete
while not extract_task.completed:
    extract_task = client.tasks.extract.get(task_id=extract_task.task_id)
    print(f"Extract Status: {extract_task.status}")
    time.sleep(3)

# Get parse results and print first 5 chunk contents
if parse_task.output is not None:
    for chunk in parse_task.output.chunks[:5]:
        if chunk.content is not None:
            print(chunk.content[:200])

# Get extract results and print schema fields
if extract_task.status == "Succeeded" and extract_task.output is not None:
    invoice = Invoice.model_validate(extract_task.output.results)
    # Do something with the invoice
    print(invoice)

Next Steps

Get to production with our task system and webhooks.

Task Handling

Learn how to handle tasks in production.

Webhooks

Receive real-time notifications.

Get Started

Task System

Features

Security

Developer Quickstart

Next Steps

Task Handling

Webhooks

Get Started

Task System

Features

Security

​Next Steps

Task Handling

Webhooks

Next Steps