Learn how to efficiently process multiple files with Chunkr AI
import asyncio
from chunkr_ai import Chunkr
import os
from pathlib import Path
chunkr = Chunkr()
async def process_directory(input_dir: str, output_dir: str):
try:
# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)
# Get all files in directory
files = list(Path(input_dir).glob('*.*'))
print(f"Found {len(files)} files to process")
# Process files concurrently
tasks = []
for file_path in files:
task = asyncio.create_task(process_file(chunkr, file_path, output_dir))
tasks.append(task)
# Wait for all files to complete
results = await asyncio.gather(*tasks)
print(f"Completed processing {len(results)} files")
except Exception as e:
print(f"Error processing directory: {e}")
async def process_file(chunkr, file_path, output_dir):
try:
# Upload file
result = await chunkr.upload(file_path)
# Check if upload was successful
if result.status == "Failed":
print(f"Failed to process file {file_path}: {result.message}")
return None
# Save result
file_name = file_path.name
output_file_path = Path(output_dir) / f"{file_name}.json"
result.json(output_file_path)
return file_name
except Exception as e:
print(f"Error processing file {file_path}: {e}")
return None
# Run the processor
if __name__ == "__main__":
INPUT_DIR = "/data/Chunkr/dataset/files"
OUTPUT_DIR = "processed/"
asyncio.run(process_directory(INPUT_DIR, OUTPUT_DIR))
Was this page helpful?