- Add an ordered_list dynamic field type (to be refined) - Add tabulator javascript library to project
49 lines
1.5 KiB
YAML
49 lines
1.5 KiB
YAML
version: "1.0.0"
|
|
name: "HTML Processor"
|
|
file_types: "html"
|
|
description: "A processor for HTML files"
|
|
configuration:
|
|
chunking_patterns:
|
|
name: "Chunking Patterns"
|
|
description: "A list of Patterns used to chunk files into logical pieces"
|
|
type: "chunking_patterns"
|
|
required: false
|
|
chunking_heading_level:
|
|
name: "Chunking Heading Level"
|
|
type: "integer"
|
|
description: "Maximum heading level to consider for chunking (1-6)"
|
|
required: false
|
|
default: 2
|
|
html_tags:
|
|
name: "HTML Tags"
|
|
type: "string"
|
|
description: "A comma-separated list of HTML tags"
|
|
required: true
|
|
default: "p, h1, h2, h3, h4, h5, h6, li, table, thead, tbody, tr, td"
|
|
html_end_tags:
|
|
name: "HTML End Tags"
|
|
type: "string"
|
|
description: "A comma-separated list of HTML end tags (where can the chunk end)"
|
|
required: true
|
|
default: "p, li, table"
|
|
html_included_elements:
|
|
name: "HTML Included Elements"
|
|
type: "string"
|
|
description: "A comma-separated list of elements to be included"
|
|
required: true
|
|
default: "article, main"
|
|
html_excluded_elements:
|
|
name: "HTML Excluded Elements"
|
|
type: "string"
|
|
description: "A comma-separated list of elements to be excluded"
|
|
required: false
|
|
default: "header, footer, nav, script"
|
|
html_excluded_classes:
|
|
name: "HTML Excluded Classes"
|
|
type: "string"
|
|
description: "A comma-separated list of classes to be excluded"
|
|
required: false
|
|
metadata:
|
|
author: "System"
|
|
date_added: "2023-01-01"
|
|
description: "A processor for HTML files" |