from typing import Dict, Type from config.type_defs.processor_types import PROCESSOR_TYPES from .base_processor import BaseProcessor class ProcessorRegistry: """Registry for processor types that aligns with PROCESSOR_TYPES configuration""" _registry: Dict[str, Type[BaseProcessor]] = {} @classmethod def register(cls, processor_type: str, processor_class: Type[BaseProcessor]): """ Register a new processor type that must match a type in PROCESSOR_TYPES Args: processor_type: Type identifier from PROCESSOR_TYPES processor_class: Processor implementation class Raises: ValueError: If processor_type isn't defined in PROCESSOR_TYPES """ if processor_type not in PROCESSOR_TYPES: raise ValueError(f"Processor type {processor_type} not found in PROCESSOR_TYPES configuration") cls._registry[processor_type] = processor_class @classmethod def get_processor_class(cls, processor_type: str) -> Type[BaseProcessor]: """ Get the processor class for a given processor type Args: processor_type: Type identifier from PROCESSOR_TYPES Returns: The registered processor class Raises: ValueError: If no processor is registered for the given type """ if processor_type not in cls._registry: raise ValueError(f"No processor registered for type: {processor_type}") return cls._registry[processor_type] @classmethod def get_processor_for_file_type(cls, file_type: str) -> tuple[str, Type[BaseProcessor]]: """ Find appropriate processor for a file type by checking PROCESSOR_TYPES definitions Args: file_type: File extension (e.g., 'html', 'pdf') Returns: Tuple of (processor_type, processor_class) Raises: ValueError: If no processor is found for the file type """ # First find which processor type handles this file type for proc_type, config in PROCESSOR_TYPES.items(): # Check if file_type is in the supported file_types (handling both string and list formats) supported_types = config['file_types'] if isinstance(supported_types, str): supported_types = [t.strip() for t in supported_types.split(',')] if file_type in supported_types: # Get the registered processor class for this type if proc_type in cls._registry: return proc_type, cls._registry[proc_type] else: raise ValueError( f"Found processor type {proc_type} for file type {file_type} but no processor is registered") raise ValueError(f"No processor type found for file type: {file_type}") @classmethod def validate_processor_registration(cls): """ Validate that all PROCESSOR_TYPES have registered processors Raises: ValueError: If any processor type lacks a registered processor """ missing_processors = [] for proc_type in PROCESSOR_TYPES.keys(): if proc_type not in cls._registry: missing_processors.append(proc_type) if missing_processors: raise ValueError(f"Missing processor registrations for: {', '.join(missing_processors)}")