- Add an ordered_list dynamic field type (to be refined) - Add tabulator javascript library to project
59 lines
1.7 KiB
YAML
59 lines
1.7 KiB
YAML
version: "1.0.0"
|
|
name: "DOCX Processor"
|
|
file_types: "docx"
|
|
description: "A processor for DOCX files"
|
|
configuration:
|
|
chunking_patterns:
|
|
name: "Chunking Patterns"
|
|
description: "A list of Patterns used to chunk files into logical pieces"
|
|
type: "chunking_patterns"
|
|
required: false
|
|
chunking_heading_level:
|
|
name: "Chunking Heading Level"
|
|
type: "integer"
|
|
description: "Maximum heading level to consider for chunking (1-6)"
|
|
required: false
|
|
default: 2
|
|
extract_comments:
|
|
name: "Extract Comments"
|
|
type: "boolean"
|
|
description: "Whether to include document comments in the markdown"
|
|
required: false
|
|
default: false
|
|
extract_headers_footers:
|
|
name: "Extract Headers/Footers"
|
|
type: "boolean"
|
|
description: "Whether to include headers and footers in the markdown"
|
|
required: false
|
|
default: false
|
|
preserve_formatting:
|
|
name: "Preserve Formatting"
|
|
type: "boolean"
|
|
description: "Whether to preserve bold, italic, and other text formatting"
|
|
required: false
|
|
default: true
|
|
list_style:
|
|
name: "List Style"
|
|
type: "enum"
|
|
description: "How to format lists in markdown"
|
|
required: false
|
|
default: "dash"
|
|
allowed_values: ["dash", "asterisk", "plus"]
|
|
image_handling:
|
|
name: "Image Handling"
|
|
type: "enum"
|
|
description: "How to handle embedded images"
|
|
required: false
|
|
default: "skip"
|
|
allowed_values: ["skip", "extract", "placeholder"]
|
|
table_alignment:
|
|
name: "Table Alignment"
|
|
type: "enum"
|
|
description: "How to align table contents"
|
|
required: false
|
|
default: "left"
|
|
allowed_values: ["left", "center", "preserve"]
|
|
metadata:
|
|
author: "System"
|
|
date_added: "2023-01-01"
|
|
description: "A processor for DOCX files" |