- Replace old implementation of PROCESSOR_TYPES and CATALOG_TYPES with the new cached approach
- Add an ordered_list dynamic field type (to be refined) - Add tabulator javascript library to project
This commit is contained in:
@@ -2,28 +2,10 @@
|
||||
CATALOG_TYPES = {
|
||||
"STANDARD_CATALOG": {
|
||||
"name": "Standard Catalog",
|
||||
"Description": "A Catalog with information in Evie's Library, to be considered as a whole",
|
||||
"configuration": {},
|
||||
"document_version_configurations": []
|
||||
"description": "A Catalog with information in Evie's Library, to be considered as a whole",
|
||||
},
|
||||
"DOSSIER_CATALOG": {
|
||||
"name": "Dossier Catalog",
|
||||
"Description": "A Catalog with information in Evie's Library in which several Dossiers can be stored",
|
||||
"configuration": {
|
||||
"tagging_fields": {
|
||||
"name": "Tagging Fields",
|
||||
"type": "tagging_fields",
|
||||
"description": """Define the metadata fields that will be used for tagging documents.
|
||||
Each field must have:
|
||||
- type: one of 'string', 'integer', 'float', 'date', 'enum'
|
||||
- required: boolean indicating if the field is mandatory
|
||||
- description: field description
|
||||
- allowed_values: list of values (for enum type only)
|
||||
- min_value/max_value: range limits (for numeric types only)""",
|
||||
"required": True,
|
||||
"default": {},
|
||||
}
|
||||
},
|
||||
"document_version_configurations": ["tagging_fields"]
|
||||
"description": "A Catalog with information in Evie's Library in which several Dossiers can be stored",
|
||||
},
|
||||
}
|
||||
|
||||
@@ -1,168 +1,28 @@
|
||||
# Catalog Types
|
||||
# Processor Types
|
||||
PROCESSOR_TYPES = {
|
||||
"HTML_PROCESSOR": {
|
||||
"name": "HTML Processor",
|
||||
"description": "A processor for HTML files",
|
||||
"file_types": "html",
|
||||
"Description": "A processor for HTML files",
|
||||
"configuration": {
|
||||
"chunking_patterns": {
|
||||
"name": "Chunking Patterns",
|
||||
"description": "A list of Patterns used to chunk files into logical pieces",
|
||||
"type": "chunking_patterns",
|
||||
"required": False
|
||||
},
|
||||
"chunking_heading_level": {
|
||||
"name": "Chunking Heading Level",
|
||||
"type": "integer",
|
||||
"description": "Maximum heading level to consider for chunking (1-6)",
|
||||
"required": False,
|
||||
"default": 2
|
||||
},
|
||||
"html_tags": {
|
||||
"name": "HTML Tags",
|
||||
"type": "string",
|
||||
"description": "A comma-separated list of HTML tags",
|
||||
"required": True,
|
||||
"default": "p, h1, h2, h3, h4, h5, h6, li, table, thead, tbody, tr, td"
|
||||
},
|
||||
"html_end_tags": {
|
||||
"name": "HTML End Tags",
|
||||
"type": "string",
|
||||
"description": "A comma-separated list of HTML end tags (where can the chunk end)",
|
||||
"required": True,
|
||||
"default": "p, li, table"
|
||||
},
|
||||
"html_included_elements": {
|
||||
"name": "HTML Included Elements",
|
||||
"type": "string",
|
||||
"description": "A comma-separated list of elements to be included",
|
||||
"required": True,
|
||||
"default": "article, main"
|
||||
},
|
||||
"html_excluded_elements": {
|
||||
"name": "HTML Excluded Elements",
|
||||
"type": "string",
|
||||
"description": "A comma-separated list of elements to be excluded",
|
||||
"required": False,
|
||||
"default": "header, footer, nav, script"
|
||||
},
|
||||
"html_excluded_classes": {
|
||||
"name": "HTML Excluded Classes",
|
||||
"type": "string",
|
||||
"description": "A comma-separated list of classes to be excluded",
|
||||
"required": False,
|
||||
},
|
||||
},
|
||||
},
|
||||
"PDF_PROCESSOR": {
|
||||
"name": "PDF Processor",
|
||||
"description": "A Processor for PDF files",
|
||||
"file_types": "pdf",
|
||||
"Description": "A Processor for PDF files",
|
||||
"configuration": {
|
||||
"chunking_patterns": {
|
||||
"name": "Chunking Patterns",
|
||||
"description": "A list of Patterns used to chunk files into logical pieces",
|
||||
"type": "chunking_patterns",
|
||||
"required": False
|
||||
},
|
||||
"chunking_heading_level": {
|
||||
"name": "Chunking Heading Level",
|
||||
"type": "integer",
|
||||
"description": "Maximum heading level to consider for chunking (1-6)",
|
||||
"required": False,
|
||||
"default": 2
|
||||
},
|
||||
},
|
||||
},
|
||||
"AUDIO_PROCESSOR": {
|
||||
"name": "AUDIO Processor",
|
||||
"description": "A Processor for audio files",
|
||||
"file_types": "mp3, mp4, ogg",
|
||||
"Description": "A Processor for audio files",
|
||||
"configuration": {}
|
||||
},
|
||||
"MARKDOWN_PROCESSOR": {
|
||||
"name": "Markdown Processor",
|
||||
"description": "A Processor for markdown files",
|
||||
"file_types": "md",
|
||||
"Description": "A Processor for markdown files",
|
||||
"configuration": {
|
||||
"chunking_patterns": {
|
||||
"name": "Chunking Patterns",
|
||||
"description": "A list of Patterns used to chunk files into logical pieces",
|
||||
"type": "chunking_patterns",
|
||||
"required": False
|
||||
},
|
||||
"chunking_heading_level": {
|
||||
"name": "Chunking Heading Level",
|
||||
"type": "integer",
|
||||
"description": "Maximum heading level to consider for chunking (1-6)",
|
||||
"required": False,
|
||||
"default": 2
|
||||
},
|
||||
}
|
||||
},
|
||||
"DOCX_PROCESSOR": {
|
||||
"name": "DOCX Processor",
|
||||
"description": "A processor for DOCX files",
|
||||
"file_types": "docx",
|
||||
"Description": "A processor for DOCX files",
|
||||
"configuration": {
|
||||
"chunking_patterns": {
|
||||
"name": "Chunking Patterns",
|
||||
"description": "A list of Patterns used to chunk files into logical pieces",
|
||||
"type": "chunking_patterns",
|
||||
"required": False
|
||||
},
|
||||
"chunking_heading_level": {
|
||||
"name": "Chunking Heading Level",
|
||||
"type": "integer",
|
||||
"description": "Maximum heading level to consider for chunking (1-6)",
|
||||
"required": False,
|
||||
"default": 2
|
||||
},
|
||||
"extract_comments": {
|
||||
"name": "Extract Comments",
|
||||
"type": "boolean",
|
||||
"description": "Whether to include document comments in the markdown",
|
||||
"required": False,
|
||||
"default": False
|
||||
},
|
||||
"extract_headers_footers": {
|
||||
"name": "Extract Headers/Footers",
|
||||
"type": "boolean",
|
||||
"description": "Whether to include headers and footers in the markdown",
|
||||
"required": False,
|
||||
"default": False
|
||||
},
|
||||
"preserve_formatting": {
|
||||
"name": "Preserve Formatting",
|
||||
"type": "boolean",
|
||||
"description": "Whether to preserve bold, italic, and other text formatting",
|
||||
"required": False,
|
||||
"default": True
|
||||
},
|
||||
"list_style": {
|
||||
"name": "List Style",
|
||||
"type": "enum",
|
||||
"description": "How to format lists in markdown",
|
||||
"required": False,
|
||||
"default": "dash",
|
||||
"allowed_values": ["dash", "asterisk", "plus"]
|
||||
},
|
||||
"image_handling": {
|
||||
"name": "Image Handling",
|
||||
"type": "enum",
|
||||
"description": "How to handle embedded images",
|
||||
"required": False,
|
||||
"default": "skip",
|
||||
"allowed_values": ["skip", "extract", "placeholder"]
|
||||
},
|
||||
"table_alignment": {
|
||||
"name": "Table Alignment",
|
||||
"type": "enum",
|
||||
"description": "How to align table contents",
|
||||
"required": False,
|
||||
"default": "left",
|
||||
"allowed_values": ["left", "center", "preserve"]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,5 +16,9 @@ SPECIALIST_TYPES = {
|
||||
"name": "Traicie Role Definition Specialist",
|
||||
"description": "Assistant Defining Competencies and KO Criteria",
|
||||
"partner": "traicie"
|
||||
},
|
||||
"TRAICIE_SELECTION_SPECIALIST": {
|
||||
"name": "Traicie Selection Specialist",
|
||||
"description": "Recruitment Selection Assistant",
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user