diff --git a/common/utils/cache/config_cache.py b/common/utils/cache/config_cache.py index f923c00..a1c959f 100644 --- a/common/utils/cache/config_cache.py +++ b/common/utils/cache/config_cache.py @@ -7,7 +7,7 @@ from flask import current_app from common.utils.cache.base import CacheHandler, CacheKey from config.type_defs import agent_types, task_types, tool_types, specialist_types, retriever_types, prompt_types, \ - catalog_types, partner_service_types + catalog_types, partner_service_types, processor_types def is_major_minor(version: str) -> bool: @@ -59,7 +59,7 @@ class BaseConfigCacheHandler(CacheHandler[Dict[str, Any]]): """Set the version tree cache dependency.""" self.version_tree_cache = cache - def _load_specific_config(self, type_name: str, version_str: str) -> Dict[str, Any]: + def _load_specific_config(self, type_name: str, version_str: str = 'latest') -> Dict[str, Any]: """ Load a specific configuration version Automatically handles global vs partner-specific configs @@ -456,6 +456,13 @@ CatalogConfigCacheHandler, CatalogConfigVersionTreeCacheHandler, CatalogConfigTy types_module=catalog_types.CATALOG_TYPES )) +ProcessorConfigCacheHandler, ProcessorConfigVersionTreeCacheHandler, ProcessorConfigTypesCacheHandler = ( + create_config_cache_handlers( + config_type='processors', + config_dir='config/processors', + types_module=processor_types.PROCESSOR_TYPES + )) + # Add to common/utils/cache/config_cache.py PartnerServiceConfigCacheHandler, PartnerServiceConfigVersionTreeCacheHandler, PartnerServiceConfigTypesCacheHandler = ( create_config_cache_handlers( @@ -487,6 +494,9 @@ def register_config_cache_handlers(cache_manager) -> None: cache_manager.register_handler(CatalogConfigCacheHandler, 'eveai_config') cache_manager.register_handler(CatalogConfigTypesCacheHandler, 'eveai_config') cache_manager.register_handler(CatalogConfigVersionTreeCacheHandler, 'eveai_config') + cache_manager.register_handler(ProcessorConfigCacheHandler, 'eveai_config') + cache_manager.register_handler(ProcessorConfigTypesCacheHandler, 'eveai_config') + cache_manager.register_handler(ProcessorConfigVersionTreeCacheHandler, 'eveai_config') cache_manager.register_handler(AgentConfigCacheHandler, 'eveai_config') cache_manager.register_handler(AgentConfigTypesCacheHandler, 'eveai_config') cache_manager.register_handler(AgentConfigVersionTreeCacheHandler, 'eveai_config') @@ -500,4 +510,6 @@ def register_config_cache_handlers(cache_manager) -> None: cache_manager.specialists_config_cache.set_version_tree_cache(cache_manager.specialists_version_tree_cache) cache_manager.retrievers_config_cache.set_version_tree_cache(cache_manager.retrievers_version_tree_cache) cache_manager.prompts_config_cache.set_version_tree_cache(cache_manager.prompts_version_tree_cache) + cache_manager.catalogs_config_cache.set_version_tree_cache(cache_manager.catalogs_version_tree_cache) + cache_manager.processors_config_cache.set_version_tree_cache(cache_manager.processors_version_tree_cache) cache_manager.partner_services_config_cache.set_version_tree_cache(cache_manager.partner_services_version_tree_cache) diff --git a/config/catalogs/globals/DOSSIER_CATALOG/1.0.0.yaml b/config/catalogs/globals/DOSSIER_CATALOG/1.0.0.yaml new file mode 100644 index 0000000..80b6048 --- /dev/null +++ b/config/catalogs/globals/DOSSIER_CATALOG/1.0.0.yaml @@ -0,0 +1,21 @@ +version: "1.0.0" +name: "Dossier Catalog" +description: "A Catalog with information in Evie's Library in which several Dossiers can be stored" +configuration: + tagging_fields: + name: "Tagging Fields" + type: "tagging_fields" + description: "Define the metadata fields that will be used for tagging documents. + Each field must have: + - type: one of 'string', 'integer', 'float', 'date', 'enum' + - required: boolean indicating if the field is mandatory + - description: field description + - allowed_values: list of values (for enum type only) + - min_value/max_value: range limits (for numeric types only)" + required: true + default: {} +document_version_configurations: ["tagging_fields"] +metadata: + author: "System" + date_added: "2023-01-01" + description: "A Catalog with information in Evie's Library in which several Dossiers can be stored" \ No newline at end of file diff --git a/config/catalogs/globals/STANDARD_CATALOG/1.0.0.yaml b/config/catalogs/globals/STANDARD_CATALOG/1.0.0.yaml new file mode 100644 index 0000000..ab708e0 --- /dev/null +++ b/config/catalogs/globals/STANDARD_CATALOG/1.0.0.yaml @@ -0,0 +1,9 @@ +version: "1.0.0" +name: "Standard Catalog" +description: "A Catalog with information in Evie's Library, to be considered as a whole" +configuration: {} +document_version_configurations: [] +metadata: + author: "System" + date_added: "2023-01-01" + description: "A Catalog with information in Evie's Library, to be considered as a whole" \ No newline at end of file diff --git a/config/processors/globals/AUDIO_PROCESSOR/1.0.0.yaml b/config/processors/globals/AUDIO_PROCESSOR/1.0.0.yaml new file mode 100644 index 0000000..fa854b3 --- /dev/null +++ b/config/processors/globals/AUDIO_PROCESSOR/1.0.0.yaml @@ -0,0 +1,9 @@ +version: "1.0.0" +name: "AUDIO Processor" +file_types: "mp3, mp4, ogg" +description: "A Processor for audio files" +configuration: {} +metadata: + author: "System" + date_added: "2023-01-01" + description: "A Processor for audio files" \ No newline at end of file diff --git a/config/processors/globals/DOCX_PROCESSOR/1.0.0.yaml b/config/processors/globals/DOCX_PROCESSOR/1.0.0.yaml new file mode 100644 index 0000000..3426a64 --- /dev/null +++ b/config/processors/globals/DOCX_PROCESSOR/1.0.0.yaml @@ -0,0 +1,59 @@ +version: "1.0.0" +name: "DOCX Processor" +file_types: "docx" +description: "A processor for DOCX files" +configuration: + chunking_patterns: + name: "Chunking Patterns" + description: "A list of Patterns used to chunk files into logical pieces" + type: "chunking_patterns" + required: false + chunking_heading_level: + name: "Chunking Heading Level" + type: "integer" + description: "Maximum heading level to consider for chunking (1-6)" + required: false + default: 2 + extract_comments: + name: "Extract Comments" + type: "boolean" + description: "Whether to include document comments in the markdown" + required: false + default: false + extract_headers_footers: + name: "Extract Headers/Footers" + type: "boolean" + description: "Whether to include headers and footers in the markdown" + required: false + default: false + preserve_formatting: + name: "Preserve Formatting" + type: "boolean" + description: "Whether to preserve bold, italic, and other text formatting" + required: false + default: true + list_style: + name: "List Style" + type: "enum" + description: "How to format lists in markdown" + required: false + default: "dash" + allowed_values: ["dash", "asterisk", "plus"] + image_handling: + name: "Image Handling" + type: "enum" + description: "How to handle embedded images" + required: false + default: "skip" + allowed_values: ["skip", "extract", "placeholder"] + table_alignment: + name: "Table Alignment" + type: "enum" + description: "How to align table contents" + required: false + default: "left" + allowed_values: ["left", "center", "preserve"] +metadata: + author: "System" + date_added: "2023-01-01" + description: "A processor for DOCX files" \ No newline at end of file diff --git a/config/processors/globals/HTML_PROCESSOR/1.0.0.yaml b/config/processors/globals/HTML_PROCESSOR/1.0.0.yaml new file mode 100644 index 0000000..6b09452 --- /dev/null +++ b/config/processors/globals/HTML_PROCESSOR/1.0.0.yaml @@ -0,0 +1,49 @@ +version: "1.0.0" +name: "HTML Processor" +file_types: "html" +description: "A processor for HTML files" +configuration: + chunking_patterns: + name: "Chunking Patterns" + description: "A list of Patterns used to chunk files into logical pieces" + type: "chunking_patterns" + required: false + chunking_heading_level: + name: "Chunking Heading Level" + type: "integer" + description: "Maximum heading level to consider for chunking (1-6)" + required: false + default: 2 + html_tags: + name: "HTML Tags" + type: "string" + description: "A comma-separated list of HTML tags" + required: true + default: "p, h1, h2, h3, h4, h5, h6, li, table, thead, tbody, tr, td" + html_end_tags: + name: "HTML End Tags" + type: "string" + description: "A comma-separated list of HTML end tags (where can the chunk end)" + required: true + default: "p, li, table" + html_included_elements: + name: "HTML Included Elements" + type: "string" + description: "A comma-separated list of elements to be included" + required: true + default: "article, main" + html_excluded_elements: + name: "HTML Excluded Elements" + type: "string" + description: "A comma-separated list of elements to be excluded" + required: false + default: "header, footer, nav, script" + html_excluded_classes: + name: "HTML Excluded Classes" + type: "string" + description: "A comma-separated list of classes to be excluded" + required: false +metadata: + author: "System" + date_added: "2023-01-01" + description: "A processor for HTML files" \ No newline at end of file diff --git a/config/processors/globals/MARKDOWN_PROCESSOR/1.0.0.yaml b/config/processors/globals/MARKDOWN_PROCESSOR/1.0.0.yaml new file mode 100644 index 0000000..189dc3e --- /dev/null +++ b/config/processors/globals/MARKDOWN_PROCESSOR/1.0.0.yaml @@ -0,0 +1,20 @@ +version: "1.0.0" +name: "Markdown Processor" +file_types: "md" +description: "A Processor for markdown files" +configuration: + chunking_patterns: + name: "Chunking Patterns" + description: "A list of Patterns used to chunk files into logical pieces" + type: "chunking_patterns" + required: false + chunking_heading_level: + name: "Chunking Heading Level" + type: "integer" + description: "Maximum heading level to consider for chunking (1-6)" + required: false + default: 2 +metadata: + author: "System" + date_added: "2023-01-01" + description: "A Processor for markdown files" \ No newline at end of file diff --git a/config/processors/globals/PDF_PROCESSOR/1.0.0.yaml b/config/processors/globals/PDF_PROCESSOR/1.0.0.yaml new file mode 100644 index 0000000..a03bb18 --- /dev/null +++ b/config/processors/globals/PDF_PROCESSOR/1.0.0.yaml @@ -0,0 +1,20 @@ +version: "1.0.0" +name: "PDF Processor" +file_types: "pdf" +description: "A Processor for PDF files" +configuration: + chunking_patterns: + name: "Chunking Patterns" + description: "A list of Patterns used to chunk files into logical pieces" + type: "chunking_patterns" + required: false + chunking_heading_level: + name: "Chunking Heading Level" + type: "integer" + description: "Maximum heading level to consider for chunking (1-6)" + required: false + default: 2 +metadata: + author: "System" + date_added: "2023-01-01" + description: "A Processor for PDF files" \ No newline at end of file diff --git a/config/type_defs/catalog_types.py b/config/type_defs/catalog_types.py index 15c4b5d..df15c56 100644 --- a/config/type_defs/catalog_types.py +++ b/config/type_defs/catalog_types.py @@ -2,28 +2,10 @@ CATALOG_TYPES = { "STANDARD_CATALOG": { "name": "Standard Catalog", - "Description": "A Catalog with information in Evie's Library, to be considered as a whole", - "configuration": {}, - "document_version_configurations": [] + "description": "A Catalog with information in Evie's Library, to be considered as a whole", }, "DOSSIER_CATALOG": { "name": "Dossier Catalog", - "Description": "A Catalog with information in Evie's Library in which several Dossiers can be stored", - "configuration": { - "tagging_fields": { - "name": "Tagging Fields", - "type": "tagging_fields", - "description": """Define the metadata fields that will be used for tagging documents. - Each field must have: - - type: one of 'string', 'integer', 'float', 'date', 'enum' - - required: boolean indicating if the field is mandatory - - description: field description - - allowed_values: list of values (for enum type only) - - min_value/max_value: range limits (for numeric types only)""", - "required": True, - "default": {}, - } - }, - "document_version_configurations": ["tagging_fields"] + "description": "A Catalog with information in Evie's Library in which several Dossiers can be stored", }, } diff --git a/config/type_defs/processor_types.py b/config/type_defs/processor_types.py index 37973ee..4ac0eac 100644 --- a/config/type_defs/processor_types.py +++ b/config/type_defs/processor_types.py @@ -1,168 +1,28 @@ -# Catalog Types +# Processor Types PROCESSOR_TYPES = { "HTML_PROCESSOR": { "name": "HTML Processor", + "description": "A processor for HTML files", "file_types": "html", - "Description": "A processor for HTML files", - "configuration": { - "chunking_patterns": { - "name": "Chunking Patterns", - "description": "A list of Patterns used to chunk files into logical pieces", - "type": "chunking_patterns", - "required": False - }, - "chunking_heading_level": { - "name": "Chunking Heading Level", - "type": "integer", - "description": "Maximum heading level to consider for chunking (1-6)", - "required": False, - "default": 2 - }, - "html_tags": { - "name": "HTML Tags", - "type": "string", - "description": "A comma-separated list of HTML tags", - "required": True, - "default": "p, h1, h2, h3, h4, h5, h6, li, table, thead, tbody, tr, td" - }, - "html_end_tags": { - "name": "HTML End Tags", - "type": "string", - "description": "A comma-separated list of HTML end tags (where can the chunk end)", - "required": True, - "default": "p, li, table" - }, - "html_included_elements": { - "name": "HTML Included Elements", - "type": "string", - "description": "A comma-separated list of elements to be included", - "required": True, - "default": "article, main" - }, - "html_excluded_elements": { - "name": "HTML Excluded Elements", - "type": "string", - "description": "A comma-separated list of elements to be excluded", - "required": False, - "default": "header, footer, nav, script" - }, - "html_excluded_classes": { - "name": "HTML Excluded Classes", - "type": "string", - "description": "A comma-separated list of classes to be excluded", - "required": False, - }, - }, }, "PDF_PROCESSOR": { "name": "PDF Processor", + "description": "A Processor for PDF files", "file_types": "pdf", - "Description": "A Processor for PDF files", - "configuration": { - "chunking_patterns": { - "name": "Chunking Patterns", - "description": "A list of Patterns used to chunk files into logical pieces", - "type": "chunking_patterns", - "required": False - }, - "chunking_heading_level": { - "name": "Chunking Heading Level", - "type": "integer", - "description": "Maximum heading level to consider for chunking (1-6)", - "required": False, - "default": 2 - }, - }, }, "AUDIO_PROCESSOR": { "name": "AUDIO Processor", + "description": "A Processor for audio files", "file_types": "mp3, mp4, ogg", - "Description": "A Processor for audio files", - "configuration": {} }, "MARKDOWN_PROCESSOR": { "name": "Markdown Processor", + "description": "A Processor for markdown files", "file_types": "md", - "Description": "A Processor for markdown files", - "configuration": { - "chunking_patterns": { - "name": "Chunking Patterns", - "description": "A list of Patterns used to chunk files into logical pieces", - "type": "chunking_patterns", - "required": False - }, - "chunking_heading_level": { - "name": "Chunking Heading Level", - "type": "integer", - "description": "Maximum heading level to consider for chunking (1-6)", - "required": False, - "default": 2 - }, - } }, "DOCX_PROCESSOR": { "name": "DOCX Processor", + "description": "A processor for DOCX files", "file_types": "docx", - "Description": "A processor for DOCX files", - "configuration": { - "chunking_patterns": { - "name": "Chunking Patterns", - "description": "A list of Patterns used to chunk files into logical pieces", - "type": "chunking_patterns", - "required": False - }, - "chunking_heading_level": { - "name": "Chunking Heading Level", - "type": "integer", - "description": "Maximum heading level to consider for chunking (1-6)", - "required": False, - "default": 2 - }, - "extract_comments": { - "name": "Extract Comments", - "type": "boolean", - "description": "Whether to include document comments in the markdown", - "required": False, - "default": False - }, - "extract_headers_footers": { - "name": "Extract Headers/Footers", - "type": "boolean", - "description": "Whether to include headers and footers in the markdown", - "required": False, - "default": False - }, - "preserve_formatting": { - "name": "Preserve Formatting", - "type": "boolean", - "description": "Whether to preserve bold, italic, and other text formatting", - "required": False, - "default": True - }, - "list_style": { - "name": "List Style", - "type": "enum", - "description": "How to format lists in markdown", - "required": False, - "default": "dash", - "allowed_values": ["dash", "asterisk", "plus"] - }, - "image_handling": { - "name": "Image Handling", - "type": "enum", - "description": "How to handle embedded images", - "required": False, - "default": "skip", - "allowed_values": ["skip", "extract", "placeholder"] - }, - "table_alignment": { - "name": "Table Alignment", - "type": "enum", - "description": "How to align table contents", - "required": False, - "default": "left", - "allowed_values": ["left", "center", "preserve"] - } - } } } diff --git a/config/type_defs/specialist_types.py b/config/type_defs/specialist_types.py index 5c4d39d..7fc3ca7 100644 --- a/config/type_defs/specialist_types.py +++ b/config/type_defs/specialist_types.py @@ -16,5 +16,9 @@ SPECIALIST_TYPES = { "name": "Traicie Role Definition Specialist", "description": "Assistant Defining Competencies and KO Criteria", "partner": "traicie" + }, + "TRAICIE_SELECTION_SPECIALIST": { + "name": "Traicie Selection Specialist", + "description": "Recruitment Selection Assistant", } } \ No newline at end of file diff --git a/eveai_app/templates/base.html b/eveai_app/templates/base.html index 6e587b1..789fc38 100644 --- a/eveai_app/templates/base.html +++ b/eveai_app/templates/base.html @@ -54,6 +54,7 @@