Chunkr AI allows you to configure tasks with a Configuration object. All configurations can be used together.

from chunkr_ai.models import ChunkProcessing, Configuration, OcrStrategy

config = Configuration(
    chunk_processing=ChunkProcessing(target_length=1024),
    expires_in=3600,
    high_resolution=True,
    ocr_strategy=OcrStrategy.AUTO,
)

task = chunkr.upload("path/to/your/file", config)

Available Configuration Examples

Chunk Processing

from chunkr_ai.models import ChunkProcessing
config = Configuration(
    chunk_processing=ChunkProcessing(
    ignore_headers_and_footers=True,
    target_length=1024
)
)

Expires In

config = Configuration(expires_in=3600)

High Resolution

config = Configuration(high_resolution=True)

OCR Strategy

config = Configuration(ocr_strategy=OcrStrategy.AUTO) # or OcrStrategy.ALL

Segment Processing

This example show cases all the options for segment processing. This is what the default configuration looks like, and is applied if nothing is specified. For your own configuration, you can customize the options you want to change and the rest will be applied by default.

from chunkr_ai.models import (
    Configuration, 
    CroppingStrategy, 
    GenerationConfig, 
    GenerationStrategy, 
    SegmentProcessing
  )
  
  config = Configuration(
      segment_processing=SegmentProcessing(
          Caption=GenerationConfig(
              crop_image=CroppingStrategy.AUTO,
              html=GenerationStrategy.AUTO,
              markdown=GenerationStrategy.AUTO,
              llm=None
          ),
          Formula=GenerationConfig(
              crop_image=CroppingStrategy.AUTO,
              html=GenerationStrategy.LLM,
              markdown=GenerationStrategy.LLM,
              llm=None
          ),
          Footnote=GenerationConfig(
              crop_image=CroppingStrategy.AUTO,
              html=GenerationStrategy.AUTO,
              markdown=GenerationStrategy.AUTO,
              llm=None
          ),
          ListItem=GenerationConfig(
              crop_image=CroppingStrategy.AUTO,
              html=GenerationStrategy.AUTO,
              markdown=GenerationStrategy.AUTO,
              llm=None
          ),
          Page=GenerationConfig(
              crop_image=CroppingStrategy.AUTO,
              html=GenerationStrategy.AUTO,
              markdown=GenerationStrategy.AUTO,
              llm=None
          ),
          PageFooter=GenerationConfig(
              crop_image=CroppingStrategy.AUTO,
              html=GenerationStrategy.AUTO,
              markdown=GenerationStrategy.AUTO,
              llm=None
          ),
          PageHeader=GenerationConfig(
              crop_image=CroppingStrategy.AUTO,
              html=GenerationStrategy.AUTO,
              markdown=GenerationStrategy.AUTO,
              llm=None
          ),
          Picture=GenerationConfig(
              crop_image=CroppingStrategy.ALL, 
              html=GenerationStrategy.AUTO,
              markdown=GenerationStrategy.AUTO,
              llm=None
          ),
          SectionHeader=GenerationConfig(
              crop_image=CroppingStrategy.AUTO,
              html=GenerationStrategy.AUTO,
              markdown=GenerationStrategy.AUTO,
              llm=None
          ),
          Table=GenerationConfig(
              crop_image=CroppingStrategy.AUTO,
              html=GenerationStrategy.LLM,
              markdown=GenerationStrategy.LLM,
              llm=None
          ),
          Text=GenerationConfig(
              crop_image=CroppingStrategy.AUTO,
              html=GenerationStrategy.AUTO,
              markdown=GenerationStrategy.AUTO,
              llm=None
          ),
          Title=GenerationConfig(
              crop_image=CroppingStrategy.AUTO,
              html=GenerationStrategy.AUTO,
              markdown=GenerationStrategy.AUTO,
              llm=None
          )
      )
  )

You can customize any segment’s generation strategy and add optional LLM prompts:

# Example with custom LLM prompt for tables
  config = Configuration(
      segment_processing=SegmentProcessing(
          Table=GenerationConfig(
              crop_image=CroppingStrategy.AUTO,
              html=GenerationStrategy.LLM,
              markdown=GenerationStrategy.LLM,
              llm="Convert this table to a clear and concise format"
          )
      )
  )

Segmentation Strategy

config = Configuration(
    segmentation_strategy=SegmentationStrategy.LAYOUT_ANALYSIS # or SegmentationStrategy.PAGE
)