- Replace old implementation of PROCESSOR_TYPES and CATALOG_TYPES with the new cached approach
- Add an ordered_list dynamic field type (to be refined) - Add tabulator javascript library to project
This commit is contained in:
21
config/catalogs/globals/DOSSIER_CATALOG/1.0.0.yaml
Normal file
21
config/catalogs/globals/DOSSIER_CATALOG/1.0.0.yaml
Normal file
@@ -0,0 +1,21 @@
|
||||
version: "1.0.0"
|
||||
name: "Dossier Catalog"
|
||||
description: "A Catalog with information in Evie's Library in which several Dossiers can be stored"
|
||||
configuration:
|
||||
tagging_fields:
|
||||
name: "Tagging Fields"
|
||||
type: "tagging_fields"
|
||||
description: "Define the metadata fields that will be used for tagging documents.
|
||||
Each field must have:
|
||||
- type: one of 'string', 'integer', 'float', 'date', 'enum'
|
||||
- required: boolean indicating if the field is mandatory
|
||||
- description: field description
|
||||
- allowed_values: list of values (for enum type only)
|
||||
- min_value/max_value: range limits (for numeric types only)"
|
||||
required: true
|
||||
default: {}
|
||||
document_version_configurations: ["tagging_fields"]
|
||||
metadata:
|
||||
author: "System"
|
||||
date_added: "2023-01-01"
|
||||
description: "A Catalog with information in Evie's Library in which several Dossiers can be stored"
|
||||
9
config/catalogs/globals/STANDARD_CATALOG/1.0.0.yaml
Normal file
9
config/catalogs/globals/STANDARD_CATALOG/1.0.0.yaml
Normal file
@@ -0,0 +1,9 @@
|
||||
version: "1.0.0"
|
||||
name: "Standard Catalog"
|
||||
description: "A Catalog with information in Evie's Library, to be considered as a whole"
|
||||
configuration: {}
|
||||
document_version_configurations: []
|
||||
metadata:
|
||||
author: "System"
|
||||
date_added: "2023-01-01"
|
||||
description: "A Catalog with information in Evie's Library, to be considered as a whole"
|
||||
9
config/processors/globals/AUDIO_PROCESSOR/1.0.0.yaml
Normal file
9
config/processors/globals/AUDIO_PROCESSOR/1.0.0.yaml
Normal file
@@ -0,0 +1,9 @@
|
||||
version: "1.0.0"
|
||||
name: "AUDIO Processor"
|
||||
file_types: "mp3, mp4, ogg"
|
||||
description: "A Processor for audio files"
|
||||
configuration: {}
|
||||
metadata:
|
||||
author: "System"
|
||||
date_added: "2023-01-01"
|
||||
description: "A Processor for audio files"
|
||||
59
config/processors/globals/DOCX_PROCESSOR/1.0.0.yaml
Normal file
59
config/processors/globals/DOCX_PROCESSOR/1.0.0.yaml
Normal file
@@ -0,0 +1,59 @@
|
||||
version: "1.0.0"
|
||||
name: "DOCX Processor"
|
||||
file_types: "docx"
|
||||
description: "A processor for DOCX files"
|
||||
configuration:
|
||||
chunking_patterns:
|
||||
name: "Chunking Patterns"
|
||||
description: "A list of Patterns used to chunk files into logical pieces"
|
||||
type: "chunking_patterns"
|
||||
required: false
|
||||
chunking_heading_level:
|
||||
name: "Chunking Heading Level"
|
||||
type: "integer"
|
||||
description: "Maximum heading level to consider for chunking (1-6)"
|
||||
required: false
|
||||
default: 2
|
||||
extract_comments:
|
||||
name: "Extract Comments"
|
||||
type: "boolean"
|
||||
description: "Whether to include document comments in the markdown"
|
||||
required: false
|
||||
default: false
|
||||
extract_headers_footers:
|
||||
name: "Extract Headers/Footers"
|
||||
type: "boolean"
|
||||
description: "Whether to include headers and footers in the markdown"
|
||||
required: false
|
||||
default: false
|
||||
preserve_formatting:
|
||||
name: "Preserve Formatting"
|
||||
type: "boolean"
|
||||
description: "Whether to preserve bold, italic, and other text formatting"
|
||||
required: false
|
||||
default: true
|
||||
list_style:
|
||||
name: "List Style"
|
||||
type: "enum"
|
||||
description: "How to format lists in markdown"
|
||||
required: false
|
||||
default: "dash"
|
||||
allowed_values: ["dash", "asterisk", "plus"]
|
||||
image_handling:
|
||||
name: "Image Handling"
|
||||
type: "enum"
|
||||
description: "How to handle embedded images"
|
||||
required: false
|
||||
default: "skip"
|
||||
allowed_values: ["skip", "extract", "placeholder"]
|
||||
table_alignment:
|
||||
name: "Table Alignment"
|
||||
type: "enum"
|
||||
description: "How to align table contents"
|
||||
required: false
|
||||
default: "left"
|
||||
allowed_values: ["left", "center", "preserve"]
|
||||
metadata:
|
||||
author: "System"
|
||||
date_added: "2023-01-01"
|
||||
description: "A processor for DOCX files"
|
||||
49
config/processors/globals/HTML_PROCESSOR/1.0.0.yaml
Normal file
49
config/processors/globals/HTML_PROCESSOR/1.0.0.yaml
Normal file
@@ -0,0 +1,49 @@
|
||||
version: "1.0.0"
|
||||
name: "HTML Processor"
|
||||
file_types: "html"
|
||||
description: "A processor for HTML files"
|
||||
configuration:
|
||||
chunking_patterns:
|
||||
name: "Chunking Patterns"
|
||||
description: "A list of Patterns used to chunk files into logical pieces"
|
||||
type: "chunking_patterns"
|
||||
required: false
|
||||
chunking_heading_level:
|
||||
name: "Chunking Heading Level"
|
||||
type: "integer"
|
||||
description: "Maximum heading level to consider for chunking (1-6)"
|
||||
required: false
|
||||
default: 2
|
||||
html_tags:
|
||||
name: "HTML Tags"
|
||||
type: "string"
|
||||
description: "A comma-separated list of HTML tags"
|
||||
required: true
|
||||
default: "p, h1, h2, h3, h4, h5, h6, li, table, thead, tbody, tr, td"
|
||||
html_end_tags:
|
||||
name: "HTML End Tags"
|
||||
type: "string"
|
||||
description: "A comma-separated list of HTML end tags (where can the chunk end)"
|
||||
required: true
|
||||
default: "p, li, table"
|
||||
html_included_elements:
|
||||
name: "HTML Included Elements"
|
||||
type: "string"
|
||||
description: "A comma-separated list of elements to be included"
|
||||
required: true
|
||||
default: "article, main"
|
||||
html_excluded_elements:
|
||||
name: "HTML Excluded Elements"
|
||||
type: "string"
|
||||
description: "A comma-separated list of elements to be excluded"
|
||||
required: false
|
||||
default: "header, footer, nav, script"
|
||||
html_excluded_classes:
|
||||
name: "HTML Excluded Classes"
|
||||
type: "string"
|
||||
description: "A comma-separated list of classes to be excluded"
|
||||
required: false
|
||||
metadata:
|
||||
author: "System"
|
||||
date_added: "2023-01-01"
|
||||
description: "A processor for HTML files"
|
||||
20
config/processors/globals/MARKDOWN_PROCESSOR/1.0.0.yaml
Normal file
20
config/processors/globals/MARKDOWN_PROCESSOR/1.0.0.yaml
Normal file
@@ -0,0 +1,20 @@
|
||||
version: "1.0.0"
|
||||
name: "Markdown Processor"
|
||||
file_types: "md"
|
||||
description: "A Processor for markdown files"
|
||||
configuration:
|
||||
chunking_patterns:
|
||||
name: "Chunking Patterns"
|
||||
description: "A list of Patterns used to chunk files into logical pieces"
|
||||
type: "chunking_patterns"
|
||||
required: false
|
||||
chunking_heading_level:
|
||||
name: "Chunking Heading Level"
|
||||
type: "integer"
|
||||
description: "Maximum heading level to consider for chunking (1-6)"
|
||||
required: false
|
||||
default: 2
|
||||
metadata:
|
||||
author: "System"
|
||||
date_added: "2023-01-01"
|
||||
description: "A Processor for markdown files"
|
||||
20
config/processors/globals/PDF_PROCESSOR/1.0.0.yaml
Normal file
20
config/processors/globals/PDF_PROCESSOR/1.0.0.yaml
Normal file
@@ -0,0 +1,20 @@
|
||||
version: "1.0.0"
|
||||
name: "PDF Processor"
|
||||
file_types: "pdf"
|
||||
description: "A Processor for PDF files"
|
||||
configuration:
|
||||
chunking_patterns:
|
||||
name: "Chunking Patterns"
|
||||
description: "A list of Patterns used to chunk files into logical pieces"
|
||||
type: "chunking_patterns"
|
||||
required: false
|
||||
chunking_heading_level:
|
||||
name: "Chunking Heading Level"
|
||||
type: "integer"
|
||||
description: "Maximum heading level to consider for chunking (1-6)"
|
||||
required: false
|
||||
default: 2
|
||||
metadata:
|
||||
author: "System"
|
||||
date_added: "2023-01-01"
|
||||
description: "A Processor for PDF files"
|
||||
@@ -2,28 +2,10 @@
|
||||
CATALOG_TYPES = {
|
||||
"STANDARD_CATALOG": {
|
||||
"name": "Standard Catalog",
|
||||
"Description": "A Catalog with information in Evie's Library, to be considered as a whole",
|
||||
"configuration": {},
|
||||
"document_version_configurations": []
|
||||
"description": "A Catalog with information in Evie's Library, to be considered as a whole",
|
||||
},
|
||||
"DOSSIER_CATALOG": {
|
||||
"name": "Dossier Catalog",
|
||||
"Description": "A Catalog with information in Evie's Library in which several Dossiers can be stored",
|
||||
"configuration": {
|
||||
"tagging_fields": {
|
||||
"name": "Tagging Fields",
|
||||
"type": "tagging_fields",
|
||||
"description": """Define the metadata fields that will be used for tagging documents.
|
||||
Each field must have:
|
||||
- type: one of 'string', 'integer', 'float', 'date', 'enum'
|
||||
- required: boolean indicating if the field is mandatory
|
||||
- description: field description
|
||||
- allowed_values: list of values (for enum type only)
|
||||
- min_value/max_value: range limits (for numeric types only)""",
|
||||
"required": True,
|
||||
"default": {},
|
||||
}
|
||||
},
|
||||
"document_version_configurations": ["tagging_fields"]
|
||||
"description": "A Catalog with information in Evie's Library in which several Dossiers can be stored",
|
||||
},
|
||||
}
|
||||
|
||||
@@ -1,168 +1,28 @@
|
||||
# Catalog Types
|
||||
# Processor Types
|
||||
PROCESSOR_TYPES = {
|
||||
"HTML_PROCESSOR": {
|
||||
"name": "HTML Processor",
|
||||
"description": "A processor for HTML files",
|
||||
"file_types": "html",
|
||||
"Description": "A processor for HTML files",
|
||||
"configuration": {
|
||||
"chunking_patterns": {
|
||||
"name": "Chunking Patterns",
|
||||
"description": "A list of Patterns used to chunk files into logical pieces",
|
||||
"type": "chunking_patterns",
|
||||
"required": False
|
||||
},
|
||||
"chunking_heading_level": {
|
||||
"name": "Chunking Heading Level",
|
||||
"type": "integer",
|
||||
"description": "Maximum heading level to consider for chunking (1-6)",
|
||||
"required": False,
|
||||
"default": 2
|
||||
},
|
||||
"html_tags": {
|
||||
"name": "HTML Tags",
|
||||
"type": "string",
|
||||
"description": "A comma-separated list of HTML tags",
|
||||
"required": True,
|
||||
"default": "p, h1, h2, h3, h4, h5, h6, li, table, thead, tbody, tr, td"
|
||||
},
|
||||
"html_end_tags": {
|
||||
"name": "HTML End Tags",
|
||||
"type": "string",
|
||||
"description": "A comma-separated list of HTML end tags (where can the chunk end)",
|
||||
"required": True,
|
||||
"default": "p, li, table"
|
||||
},
|
||||
"html_included_elements": {
|
||||
"name": "HTML Included Elements",
|
||||
"type": "string",
|
||||
"description": "A comma-separated list of elements to be included",
|
||||
"required": True,
|
||||
"default": "article, main"
|
||||
},
|
||||
"html_excluded_elements": {
|
||||
"name": "HTML Excluded Elements",
|
||||
"type": "string",
|
||||
"description": "A comma-separated list of elements to be excluded",
|
||||
"required": False,
|
||||
"default": "header, footer, nav, script"
|
||||
},
|
||||
"html_excluded_classes": {
|
||||
"name": "HTML Excluded Classes",
|
||||
"type": "string",
|
||||
"description": "A comma-separated list of classes to be excluded",
|
||||
"required": False,
|
||||
},
|
||||
},
|
||||
},
|
||||
"PDF_PROCESSOR": {
|
||||
"name": "PDF Processor",
|
||||
"description": "A Processor for PDF files",
|
||||
"file_types": "pdf",
|
||||
"Description": "A Processor for PDF files",
|
||||
"configuration": {
|
||||
"chunking_patterns": {
|
||||
"name": "Chunking Patterns",
|
||||
"description": "A list of Patterns used to chunk files into logical pieces",
|
||||
"type": "chunking_patterns",
|
||||
"required": False
|
||||
},
|
||||
"chunking_heading_level": {
|
||||
"name": "Chunking Heading Level",
|
||||
"type": "integer",
|
||||
"description": "Maximum heading level to consider for chunking (1-6)",
|
||||
"required": False,
|
||||
"default": 2
|
||||
},
|
||||
},
|
||||
},
|
||||
"AUDIO_PROCESSOR": {
|
||||
"name": "AUDIO Processor",
|
||||
"description": "A Processor for audio files",
|
||||
"file_types": "mp3, mp4, ogg",
|
||||
"Description": "A Processor for audio files",
|
||||
"configuration": {}
|
||||
},
|
||||
"MARKDOWN_PROCESSOR": {
|
||||
"name": "Markdown Processor",
|
||||
"description": "A Processor for markdown files",
|
||||
"file_types": "md",
|
||||
"Description": "A Processor for markdown files",
|
||||
"configuration": {
|
||||
"chunking_patterns": {
|
||||
"name": "Chunking Patterns",
|
||||
"description": "A list of Patterns used to chunk files into logical pieces",
|
||||
"type": "chunking_patterns",
|
||||
"required": False
|
||||
},
|
||||
"chunking_heading_level": {
|
||||
"name": "Chunking Heading Level",
|
||||
"type": "integer",
|
||||
"description": "Maximum heading level to consider for chunking (1-6)",
|
||||
"required": False,
|
||||
"default": 2
|
||||
},
|
||||
}
|
||||
},
|
||||
"DOCX_PROCESSOR": {
|
||||
"name": "DOCX Processor",
|
||||
"description": "A processor for DOCX files",
|
||||
"file_types": "docx",
|
||||
"Description": "A processor for DOCX files",
|
||||
"configuration": {
|
||||
"chunking_patterns": {
|
||||
"name": "Chunking Patterns",
|
||||
"description": "A list of Patterns used to chunk files into logical pieces",
|
||||
"type": "chunking_patterns",
|
||||
"required": False
|
||||
},
|
||||
"chunking_heading_level": {
|
||||
"name": "Chunking Heading Level",
|
||||
"type": "integer",
|
||||
"description": "Maximum heading level to consider for chunking (1-6)",
|
||||
"required": False,
|
||||
"default": 2
|
||||
},
|
||||
"extract_comments": {
|
||||
"name": "Extract Comments",
|
||||
"type": "boolean",
|
||||
"description": "Whether to include document comments in the markdown",
|
||||
"required": False,
|
||||
"default": False
|
||||
},
|
||||
"extract_headers_footers": {
|
||||
"name": "Extract Headers/Footers",
|
||||
"type": "boolean",
|
||||
"description": "Whether to include headers and footers in the markdown",
|
||||
"required": False,
|
||||
"default": False
|
||||
},
|
||||
"preserve_formatting": {
|
||||
"name": "Preserve Formatting",
|
||||
"type": "boolean",
|
||||
"description": "Whether to preserve bold, italic, and other text formatting",
|
||||
"required": False,
|
||||
"default": True
|
||||
},
|
||||
"list_style": {
|
||||
"name": "List Style",
|
||||
"type": "enum",
|
||||
"description": "How to format lists in markdown",
|
||||
"required": False,
|
||||
"default": "dash",
|
||||
"allowed_values": ["dash", "asterisk", "plus"]
|
||||
},
|
||||
"image_handling": {
|
||||
"name": "Image Handling",
|
||||
"type": "enum",
|
||||
"description": "How to handle embedded images",
|
||||
"required": False,
|
||||
"default": "skip",
|
||||
"allowed_values": ["skip", "extract", "placeholder"]
|
||||
},
|
||||
"table_alignment": {
|
||||
"name": "Table Alignment",
|
||||
"type": "enum",
|
||||
"description": "How to align table contents",
|
||||
"required": False,
|
||||
"default": "left",
|
||||
"allowed_values": ["left", "center", "preserve"]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,5 +16,9 @@ SPECIALIST_TYPES = {
|
||||
"name": "Traicie Role Definition Specialist",
|
||||
"description": "Assistant Defining Competencies and KO Criteria",
|
||||
"partner": "traicie"
|
||||
},
|
||||
"TRAICIE_SELECTION_SPECIALIST": {
|
||||
"name": "Traicie Selection Specialist",
|
||||
"description": "Recruitment Selection Assistant",
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user