- Replace old implementation of PROCESSOR_TYPES and CATALOG_TYPES with the new cached approach

- Add an ordered_list dynamic field type (to be refined)
- Add tabulator javascript library to project
This commit is contained in:
Josako
2025-05-29 16:00:25 +02:00
parent 8a29eb0d8f
commit 25e169dbea
23 changed files with 687 additions and 199 deletions

View File

@@ -2,28 +2,10 @@
CATALOG_TYPES = {
"STANDARD_CATALOG": {
"name": "Standard Catalog",
"Description": "A Catalog with information in Evie's Library, to be considered as a whole",
"configuration": {},
"document_version_configurations": []
"description": "A Catalog with information in Evie's Library, to be considered as a whole",
},
"DOSSIER_CATALOG": {
"name": "Dossier Catalog",
"Description": "A Catalog with information in Evie's Library in which several Dossiers can be stored",
"configuration": {
"tagging_fields": {
"name": "Tagging Fields",
"type": "tagging_fields",
"description": """Define the metadata fields that will be used for tagging documents.
Each field must have:
- type: one of 'string', 'integer', 'float', 'date', 'enum'
- required: boolean indicating if the field is mandatory
- description: field description
- allowed_values: list of values (for enum type only)
- min_value/max_value: range limits (for numeric types only)""",
"required": True,
"default": {},
}
},
"document_version_configurations": ["tagging_fields"]
"description": "A Catalog with information in Evie's Library in which several Dossiers can be stored",
},
}

View File

@@ -1,168 +1,28 @@
# Catalog Types
# Processor Types
PROCESSOR_TYPES = {
"HTML_PROCESSOR": {
"name": "HTML Processor",
"description": "A processor for HTML files",
"file_types": "html",
"Description": "A processor for HTML files",
"configuration": {
"chunking_patterns": {
"name": "Chunking Patterns",
"description": "A list of Patterns used to chunk files into logical pieces",
"type": "chunking_patterns",
"required": False
},
"chunking_heading_level": {
"name": "Chunking Heading Level",
"type": "integer",
"description": "Maximum heading level to consider for chunking (1-6)",
"required": False,
"default": 2
},
"html_tags": {
"name": "HTML Tags",
"type": "string",
"description": "A comma-separated list of HTML tags",
"required": True,
"default": "p, h1, h2, h3, h4, h5, h6, li, table, thead, tbody, tr, td"
},
"html_end_tags": {
"name": "HTML End Tags",
"type": "string",
"description": "A comma-separated list of HTML end tags (where can the chunk end)",
"required": True,
"default": "p, li, table"
},
"html_included_elements": {
"name": "HTML Included Elements",
"type": "string",
"description": "A comma-separated list of elements to be included",
"required": True,
"default": "article, main"
},
"html_excluded_elements": {
"name": "HTML Excluded Elements",
"type": "string",
"description": "A comma-separated list of elements to be excluded",
"required": False,
"default": "header, footer, nav, script"
},
"html_excluded_classes": {
"name": "HTML Excluded Classes",
"type": "string",
"description": "A comma-separated list of classes to be excluded",
"required": False,
},
},
},
"PDF_PROCESSOR": {
"name": "PDF Processor",
"description": "A Processor for PDF files",
"file_types": "pdf",
"Description": "A Processor for PDF files",
"configuration": {
"chunking_patterns": {
"name": "Chunking Patterns",
"description": "A list of Patterns used to chunk files into logical pieces",
"type": "chunking_patterns",
"required": False
},
"chunking_heading_level": {
"name": "Chunking Heading Level",
"type": "integer",
"description": "Maximum heading level to consider for chunking (1-6)",
"required": False,
"default": 2
},
},
},
"AUDIO_PROCESSOR": {
"name": "AUDIO Processor",
"description": "A Processor for audio files",
"file_types": "mp3, mp4, ogg",
"Description": "A Processor for audio files",
"configuration": {}
},
"MARKDOWN_PROCESSOR": {
"name": "Markdown Processor",
"description": "A Processor for markdown files",
"file_types": "md",
"Description": "A Processor for markdown files",
"configuration": {
"chunking_patterns": {
"name": "Chunking Patterns",
"description": "A list of Patterns used to chunk files into logical pieces",
"type": "chunking_patterns",
"required": False
},
"chunking_heading_level": {
"name": "Chunking Heading Level",
"type": "integer",
"description": "Maximum heading level to consider for chunking (1-6)",
"required": False,
"default": 2
},
}
},
"DOCX_PROCESSOR": {
"name": "DOCX Processor",
"description": "A processor for DOCX files",
"file_types": "docx",
"Description": "A processor for DOCX files",
"configuration": {
"chunking_patterns": {
"name": "Chunking Patterns",
"description": "A list of Patterns used to chunk files into logical pieces",
"type": "chunking_patterns",
"required": False
},
"chunking_heading_level": {
"name": "Chunking Heading Level",
"type": "integer",
"description": "Maximum heading level to consider for chunking (1-6)",
"required": False,
"default": 2
},
"extract_comments": {
"name": "Extract Comments",
"type": "boolean",
"description": "Whether to include document comments in the markdown",
"required": False,
"default": False
},
"extract_headers_footers": {
"name": "Extract Headers/Footers",
"type": "boolean",
"description": "Whether to include headers and footers in the markdown",
"required": False,
"default": False
},
"preserve_formatting": {
"name": "Preserve Formatting",
"type": "boolean",
"description": "Whether to preserve bold, italic, and other text formatting",
"required": False,
"default": True
},
"list_style": {
"name": "List Style",
"type": "enum",
"description": "How to format lists in markdown",
"required": False,
"default": "dash",
"allowed_values": ["dash", "asterisk", "plus"]
},
"image_handling": {
"name": "Image Handling",
"type": "enum",
"description": "How to handle embedded images",
"required": False,
"default": "skip",
"allowed_values": ["skip", "extract", "placeholder"]
},
"table_alignment": {
"name": "Table Alignment",
"type": "enum",
"description": "How to align table contents",
"required": False,
"default": "left",
"allowed_values": ["left", "center", "preserve"]
}
}
}
}

View File

@@ -16,5 +16,9 @@ SPECIALIST_TYPES = {
"name": "Traicie Role Definition Specialist",
"description": "Assistant Defining Competencies and KO Criteria",
"partner": "traicie"
},
"TRAICIE_SELECTION_SPECIALIST": {
"name": "Traicie Selection Specialist",
"description": "Recruitment Selection Assistant",
}
}