- Introduction of dynamic Processors - Introduction of caching system - Introduction of a better template manager - Adaptation of ModelVariables to support dynamic Processors / Retrievers / Specialists - Start adaptation of chat client
93 lines
3.4 KiB
Python
93 lines
3.4 KiB
Python
from typing import Dict, Type, Optional
|
|
from flask import current_app
|
|
from config.processor_types import PROCESSOR_TYPES
|
|
from .base_processor import BaseProcessor
|
|
|
|
|
|
class ProcessorRegistry:
|
|
"""Registry for processor types that aligns with PROCESSOR_TYPES configuration"""
|
|
|
|
_registry: Dict[str, Type[BaseProcessor]] = {}
|
|
|
|
@classmethod
|
|
def register(cls, processor_type: str, processor_class: Type[BaseProcessor]):
|
|
"""
|
|
Register a new processor type that must match a type in PROCESSOR_TYPES
|
|
|
|
Args:
|
|
processor_type: Type identifier from PROCESSOR_TYPES
|
|
processor_class: Processor implementation class
|
|
|
|
Raises:
|
|
ValueError: If processor_type isn't defined in PROCESSOR_TYPES
|
|
"""
|
|
if processor_type not in PROCESSOR_TYPES:
|
|
raise ValueError(f"Processor type {processor_type} not found in PROCESSOR_TYPES configuration")
|
|
|
|
cls._registry[processor_type] = processor_class
|
|
|
|
@classmethod
|
|
def get_processor_class(cls, processor_type: str) -> Type[BaseProcessor]:
|
|
"""
|
|
Get the processor class for a given processor type
|
|
|
|
Args:
|
|
processor_type: Type identifier from PROCESSOR_TYPES
|
|
|
|
Returns:
|
|
The registered processor class
|
|
|
|
Raises:
|
|
ValueError: If no processor is registered for the given type
|
|
"""
|
|
if processor_type not in cls._registry:
|
|
raise ValueError(f"No processor registered for type: {processor_type}")
|
|
return cls._registry[processor_type]
|
|
|
|
@classmethod
|
|
def get_processor_for_file_type(cls, file_type: str) -> tuple[str, Type[BaseProcessor]]:
|
|
"""
|
|
Find appropriate processor for a file type by checking PROCESSOR_TYPES definitions
|
|
|
|
Args:
|
|
file_type: File extension (e.g., 'html', 'pdf')
|
|
|
|
Returns:
|
|
Tuple of (processor_type, processor_class)
|
|
|
|
Raises:
|
|
ValueError: If no processor is found for the file type
|
|
"""
|
|
# First find which processor type handles this file type
|
|
for proc_type, config in PROCESSOR_TYPES.items():
|
|
# Check if file_type is in the supported file_types (handling both string and list formats)
|
|
supported_types = config['file_types']
|
|
if isinstance(supported_types, str):
|
|
supported_types = [t.strip() for t in supported_types.split(',')]
|
|
|
|
if file_type in supported_types:
|
|
# Get the registered processor class for this type
|
|
if proc_type in cls._registry:
|
|
return proc_type, cls._registry[proc_type]
|
|
else:
|
|
raise ValueError(
|
|
f"Found processor type {proc_type} for file type {file_type} but no processor is registered")
|
|
|
|
raise ValueError(f"No processor type found for file type: {file_type}")
|
|
|
|
@classmethod
|
|
def validate_processor_registration(cls):
|
|
"""
|
|
Validate that all PROCESSOR_TYPES have registered processors
|
|
|
|
Raises:
|
|
ValueError: If any processor type lacks a registered processor
|
|
"""
|
|
missing_processors = []
|
|
for proc_type in PROCESSOR_TYPES.keys():
|
|
if proc_type not in cls._registry:
|
|
missing_processors.append(proc_type)
|
|
|
|
if missing_processors:
|
|
raise ValueError(f"Missing processor registrations for: {', '.join(missing_processors)}")
|