Learn how to configure tasks in Chunkr AI
Configuration
object. All configurations can be used together.
from chunkr_ai.models import ChunkProcessing, Configuration, OcrStrategy
config = Configuration(
chunk_processing=ChunkProcessing(target_length=1024),
expires_in=3600,
ocr_strategy=OcrStrategy.AUTO,
)
task = chunkr.upload("path/to/your/file", config)
from chunkr_ai.models import ChunkProcessing
config = Configuration(
chunk_processing=ChunkProcessing(
target_length=1024
)
)
config = Configuration(expires_in=3600)
config = Configuration(ocr_strategy=OcrStrategy.AUTO) # or OcrStrategy.ALL
from chunkr_ai.models import (
Configuration,
CroppingStrategy,
GenerationConfig,
GenerationStrategy,
SegmentProcessing,
SegmentFormat
)
config = Configuration(
segment_processing=SegmentProcessing(
Caption=GenerationConfig(
crop_image=CroppingStrategy.AUTO,
format=SegmentFormat.MARKDOWN,
strategy=GenerationStrategy.AUTO,
description=False
),
Formula=GenerationConfig(
crop_image=CroppingStrategy.AUTO,
format=SegmentFormat.MARKDOWN,
strategy=GenerationStrategy.LLM,
description=False
),
Footnote=GenerationConfig(
crop_image=CroppingStrategy.AUTO,
format=SegmentFormat.MARKDOWN,
strategy=GenerationStrategy.AUTO,
description=False
),
ListItem=GenerationConfig(
crop_image=CroppingStrategy.AUTO,
format=SegmentFormat.MARKDOWN,
strategy=GenerationStrategy.AUTO,
description=False
),
Page=GenerationConfig(
crop_image=CroppingStrategy.AUTO,
format=SegmentFormat.MARKDOWN,
strategy=GenerationStrategy.AUTO,
description=False
),
PageFooter=GenerationConfig(
crop_image=CroppingStrategy.AUTO,
format=SegmentFormat.MARKDOWN,
strategy=GenerationStrategy.IGNORE,
description=False
),
PageHeader=GenerationConfig(
crop_image=CroppingStrategy.AUTO,
format=SegmentFormat.MARKDOWN,
strategy=GenerationStrategy.IGNORE,
description=False
),
Picture=GenerationConfig(
crop_image=CroppingStrategy.ALL,
format=SegmentFormat.MARKDOWN,
strategy=GenerationStrategy.AUTO,
description=False
),
SectionHeader=GenerationConfig(
crop_image=CroppingStrategy.AUTO,
format=SegmentFormat.MARKDOWN,
strategy=GenerationStrategy.AUTO,
description=False
),
Table=GenerationConfig(
crop_image=CroppingStrategy.AUTO,
format=SegmentFormat.HTML,
strategy=GenerationStrategy.LLM,
description=True
),
Text=GenerationConfig(
crop_image=CroppingStrategy.AUTO,
format=SegmentFormat.MARKDOWN,
strategy=GenerationStrategy.AUTO,
description=False
),
Title=GenerationConfig(
crop_image=CroppingStrategy.AUTO,
format=SegmentFormat.MARKDOWN,
strategy=GenerationStrategy.AUTO,
description=False
)
)
)
# Example with descriptions enabled for tables
config = Configuration(
segment_processing=SegmentProcessing(
Table=GenerationConfig(
crop_image=CroppingStrategy.AUTO,
format=SegmentFormat.HTML,
strategy=GenerationStrategy.LLM,
description=True
)
)
)
config = Configuration(
segmentation_strategy=SegmentationStrategy.LAYOUT_ANALYSIS # or SegmentationStrategy.PAGE
)
Was this page helpful?