Use Cases
Bulk Upload
Get Started
Features
- Overview
- Layout Analysis
- Segment Processing
- OCR
- Intelligent chunking
- Pipeline
Use Cases
Self Hosting
Use Cases
Bulk Upload
Learn how to efficiently process multiple files with Chunkr AI
Here’s how to efficiently process multiple files using Chunkr AI’s async capabilities.
Process a Directory
Here’s a simple script to process all files in a directory:
import asyncio
from chunkr_ai import Chunkr
import os
from pathlib import Path
chunkr = Chunkr()
async def process_directory(input_dir: str, output_dir: str):
try:
# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)
# Get all files in directory
files = list(Path(input_dir).glob('*.*'))
print(f"Found {len(files)} files to process")
# Process files concurrently
tasks = []
for file_path in files:
task = asyncio.create_task(process_file(chunkr, file_path, output_dir))
tasks.append(task)
# Wait for all files to complete
results = await asyncio.gather(*tasks)
print(f"Completed processing {len(results)} files")
except Exception as e:
print(f"Error processing directory: {e}")
async def process_file(chunkr, file_path, output_dir):
try:
# Upload file
result = await chunkr.upload(file_path)
# Check if upload was successful
if result.status == "Failed":
print(f"Failed to process file {file_path}: {result.message}")
return None
# Save result
file_name = file_path.name
output_file_path = Path(output_dir) / f"{file_name}.json"
result.json(output_file_path)
return file_name
except Exception as e:
print(f"Error processing file {file_path}: {e}")
return None
# Run the processor
if __name__ == "__main__":
INPUT_DIR = "/data/Chunkr/dataset/files"
OUTPUT_DIR = "processed/"
asyncio.run(process_directory(INPUT_DIR, OUTPUT_DIR))
On this page