- Introduction of the Automatic HTML Processor

- Translation Service improvement
- Enable activation / deactivation of Processors
- Renew API-keys for Mistral (leading to workspaces)
- Align all Document views to use of a session catalog
- Allow for different processors for the same file type
This commit is contained in:
Josako
2025-06-26 14:38:40 +02:00
parent f5c9542a49
commit fda267b479
35 changed files with 551 additions and 356 deletions

View File

@@ -71,15 +71,6 @@ class ProcessorForm(FlaskForm):
name = StringField('Name', validators=[DataRequired(), Length(max=50)])
description = TextAreaField('Description', validators=[Optional()])
# Catalog for the Retriever
catalog = QuerySelectField(
'Catalog ID',
query_factory=lambda: Catalog.query.all(),
allow_blank=True,
get_label='name',
validators=[DataRequired()],
)
# Select Field for Catalog Type (Uses the CATALOG_TYPES defined in config)
type = SelectField('Processor Type', validators=[DataRequired()])
@@ -89,6 +80,7 @@ class ProcessorForm(FlaskForm):
default=2000)
max_chunk_size = IntegerField('Maximum Chunk Size (3000)', validators=[NumberRange(min=0), Optional()],
default=3000)
active = BooleanField('Active', default=True)
tuning = BooleanField('Enable Embedding Tuning', default=False)
# Metadata fields
@@ -108,14 +100,6 @@ class EditProcessorForm(DynamicFormBase):
name = StringField('Name', validators=[DataRequired(), Length(max=50)])
description = TextAreaField('Description', validators=[Optional()])
# Catalog for the Retriever
catalog = QuerySelectField(
'Catalog ID',
query_factory=lambda: Catalog.query.all(),
allow_blank=True,
get_label='name',
validators=[Optional()],
)
type = StringField('Processor Type', validators=[DataRequired()], render_kw={'readonly': True})
sub_file_type = StringField('Sub File Type', validators=[Optional(), Length(max=50)])
@@ -124,6 +108,7 @@ class EditProcessorForm(DynamicFormBase):
default=2000)
max_chunk_size = IntegerField('Maximum Chunk Size (3000)', validators=[NumberRange(min=0), Optional()],
default=3000)
active = BooleanField('Active', default=True)
tuning = BooleanField('Enable Embedding Tuning', default=False)
# Metadata fields
@@ -134,14 +119,7 @@ class EditProcessorForm(DynamicFormBase):
class RetrieverForm(FlaskForm):
name = StringField('Name', validators=[DataRequired(), Length(max=50)])
description = TextAreaField('Description', validators=[Optional()])
# Catalog for the Retriever
catalog = QuerySelectField(
'Catalog ID',
query_factory=lambda: Catalog.query.all(),
allow_blank=True,
get_label='name',
validators=[Optional()],
)
# Select Field for Retriever Type (Uses the RETRIEVER_TYPES defined in config)
type = SelectField('Retriever Type', validators=[DataRequired()])
tuning = BooleanField('Enable Tuning', default=False)
@@ -160,14 +138,7 @@ class RetrieverForm(FlaskForm):
class EditRetrieverForm(DynamicFormBase):
name = StringField('Name', validators=[DataRequired(), Length(max=50)])
description = TextAreaField('Description', validators=[Optional()])
# Catalog for the Retriever
catalog = QuerySelectField(
'Catalog ID',
query_factory=lambda: Catalog.query.all(),
allow_blank=True,
get_label='name',
validators=[Optional()],
)
# Select Field for Retriever Type (Uses the RETRIEVER_TYPES defined in config)
type = StringField('Processor Type', validators=[DataRequired()], render_kw={'readonly': True})
tuning = BooleanField('Enable Tuning', default=False)