- Adapt Catalogs & Retrievers to use specific types, removing tagging_fields - Adding CrewAI Implementation Guide
226 lines
11 KiB
Python
226 lines
11 KiB
Python
from flask import session, current_app
|
|
from flask_wtf import FlaskForm
|
|
from wtforms import (StringField, BooleanField, SubmitField, DateField, IntegerField, SelectField, TextAreaField,
|
|
URLField)
|
|
from wtforms.validators import DataRequired, Length, Optional, URL, ValidationError, NumberRange
|
|
from flask_wtf.file import FileField, FileRequired
|
|
import json
|
|
|
|
from wtforms.widgets.core import HiddenInput
|
|
from wtforms_sqlalchemy.fields import QuerySelectField
|
|
|
|
from common.extensions import cache_manager
|
|
from common.models.document import Catalog
|
|
from common.services.user import TenantServices
|
|
from common.utils.form_assistants import validate_json
|
|
|
|
from config.type_defs.processor_types import PROCESSOR_TYPES
|
|
from .dynamic_form_base import DynamicFormBase
|
|
|
|
|
|
def validate_catalog_name(form, field):
|
|
# Controleer of een catalog met deze naam al bestaat
|
|
existing_catalog = Catalog.query.filter_by(name=field.data).first()
|
|
if existing_catalog and (not hasattr(form, 'id') or form.id.data != existing_catalog.id):
|
|
raise ValidationError(f'A Catalog with name "{field.data}" already exists. Choose another name.')
|
|
|
|
|
|
class CatalogForm(FlaskForm):
|
|
name = StringField('Name', validators=[DataRequired(), Length(max=50), validate_catalog_name])
|
|
description = TextAreaField('Description', validators=[Optional()])
|
|
|
|
# Select Field for Catalog Type (Uses the CATALOG_TYPES defined in config)
|
|
type = SelectField('Catalog Type', validators=[DataRequired()])
|
|
|
|
# Selection fields for processing & creating embeddings
|
|
min_chunk_size = IntegerField('Minimum Chunk Size (1500)', validators=[NumberRange(min=0), Optional()],
|
|
default=1500)
|
|
max_chunk_size = IntegerField('Maximum Chunk Size (2500)', validators=[NumberRange(min=0), Optional()],
|
|
default=2500)
|
|
|
|
# Metadata fields
|
|
user_metadata = TextAreaField('User Metadata', validators=[Optional(), validate_json])
|
|
system_metadata = TextAreaField('System Metadata', validators=[Optional(), validate_json])
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
super().__init__(*args, **kwargs)
|
|
# Dynamically populate the 'type' field using the constructor
|
|
types_dict = cache_manager.catalogs_types_cache.get_types()
|
|
# Dynamically populate the 'type' field using the constructor
|
|
tenant_id = session.get('tenant').get('id')
|
|
choices = TenantServices.get_available_types_for_tenant(tenant_id, "catalogs")
|
|
self.type.choices = [(key, value['name']) for key, value in choices.items()]
|
|
|
|
|
|
class EditCatalogForm(DynamicFormBase):
|
|
id = IntegerField('ID', widget=HiddenInput())
|
|
name = StringField('Name', validators=[DataRequired(), Length(max=50), validate_catalog_name])
|
|
description = TextAreaField('Description', validators=[Optional()])
|
|
|
|
# Select Field for Catalog Type (Uses the CATALOG_TYPES defined in config)
|
|
type = StringField('Catalog Type', validators=[DataRequired()], render_kw={'readonly': True})
|
|
type_version = StringField('Catalog Type Version', validators=[DataRequired()], render_kw={'readonly': True})
|
|
|
|
# Selection fields for processing & creating embeddings
|
|
min_chunk_size = IntegerField('Minimum Chunk Size (2000)', validators=[NumberRange(min=0), Optional()],
|
|
default=2000)
|
|
max_chunk_size = IntegerField('Maximum Chunk Size (3000)', validators=[NumberRange(min=0), Optional()],
|
|
default=3000)
|
|
|
|
# Metadata fields
|
|
user_metadata = TextAreaField('User Metadata', validators=[Optional(), validate_json])
|
|
system_metadata = TextAreaField('System Metadata', validators=[Optional(), validate_json], )
|
|
|
|
|
|
class ProcessorForm(FlaskForm):
|
|
name = StringField('Name', validators=[DataRequired(), Length(max=50)])
|
|
description = TextAreaField('Description', validators=[Optional()])
|
|
|
|
# Select Field for Catalog Type (Uses the CATALOG_TYPES defined in config)
|
|
type = SelectField('Processor Type', validators=[DataRequired()])
|
|
|
|
sub_file_type = StringField('Sub File Type', validators=[Optional(), Length(max=50)])
|
|
|
|
min_chunk_size = IntegerField('Minimum Chunk Size (2000)', validators=[NumberRange(min=0), Optional()],
|
|
default=2000)
|
|
max_chunk_size = IntegerField('Maximum Chunk Size (3000)', validators=[NumberRange(min=0), Optional()],
|
|
default=3000)
|
|
active = BooleanField('Active', default=True)
|
|
tuning = BooleanField('Enable Embedding Tuning', default=False)
|
|
|
|
# Metadata fields
|
|
user_metadata = TextAreaField('User Metadata', validators=[Optional(), validate_json])
|
|
system_metadata = TextAreaField('System Metadata', validators=[Optional(), validate_json])
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
super().__init__(*args, **kwargs)
|
|
# Dynamically populate the 'type' field using the constructor
|
|
self.type.choices = sorted(
|
|
[(key, value['name']) for key, value in PROCESSOR_TYPES.items()],
|
|
key=lambda x: x[1],
|
|
)
|
|
|
|
|
|
class EditProcessorForm(DynamicFormBase):
|
|
name = StringField('Name', validators=[DataRequired(), Length(max=50)])
|
|
description = TextAreaField('Description', validators=[Optional()])
|
|
|
|
type = StringField('Processor Type', validators=[DataRequired()], render_kw={'readonly': True})
|
|
|
|
sub_file_type = StringField('Sub File Type', validators=[Optional(), Length(max=50)])
|
|
|
|
min_chunk_size = IntegerField('Minimum Chunk Size (2000)', validators=[NumberRange(min=0), Optional()],
|
|
default=2000)
|
|
max_chunk_size = IntegerField('Maximum Chunk Size (3000)', validators=[NumberRange(min=0), Optional()],
|
|
default=3000)
|
|
active = BooleanField('Active', default=True)
|
|
tuning = BooleanField('Enable Embedding Tuning', default=False)
|
|
|
|
# Metadata fields
|
|
user_metadata = TextAreaField('User Metadata', validators=[Optional(), validate_json])
|
|
system_metadata = TextAreaField('System Metadata', validators=[Optional(), validate_json])
|
|
|
|
|
|
class RetrieverForm(FlaskForm):
|
|
name = StringField('Name', validators=[DataRequired(), Length(max=50)])
|
|
description = TextAreaField('Description', validators=[Optional()])
|
|
|
|
# Select Field for Retriever Type (Uses the RETRIEVER_TYPES defined in config)
|
|
type = SelectField('Retriever Type', validators=[DataRequired()])
|
|
tuning = BooleanField('Enable Tuning', default=False)
|
|
|
|
# Metadata fields
|
|
user_metadata = TextAreaField('User Metadata', validators=[Optional(), validate_json])
|
|
system_metadata = TextAreaField('System Metadata', validators=[Optional(), validate_json])
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
super().__init__(*args, **kwargs)
|
|
tenant_id = session.get('tenant').get('id')
|
|
choices = TenantServices.get_available_types_for_tenant(tenant_id, "retrievers")
|
|
current_app.logger.debug(f"Potential choices: {choices}")
|
|
# Dynamically populate the 'type' field using the constructor
|
|
type_choices = []
|
|
for key, value in choices.items():
|
|
valid_catalog_types = value.get('valid_catalog_types', None)
|
|
if valid_catalog_types:
|
|
catalog_type = session.get('catalog').get('type')
|
|
current_app.logger.debug(f"Check {catalog_type} in {valid_catalog_types}")
|
|
if catalog_type in valid_catalog_types:
|
|
type_choices.append((key, value['name']))
|
|
else: # Retriever type is valid for all catalog types
|
|
type_choices.append((key, value['name']))
|
|
self.type.choices = type_choices
|
|
|
|
|
|
class EditRetrieverForm(DynamicFormBase):
|
|
name = StringField('Name', validators=[DataRequired(), Length(max=50)])
|
|
description = TextAreaField('Description', validators=[Optional()])
|
|
|
|
# Select Field for Retriever Type (Uses the RETRIEVER_TYPES defined in config)
|
|
type = StringField('Processor Type', validators=[DataRequired()], render_kw={'readonly': True})
|
|
type_version = StringField('Retriever Type Version', validators=[DataRequired()], render_kw={'readonly': True})
|
|
tuning = BooleanField('Enable Tuning', default=False)
|
|
|
|
# Metadata fields
|
|
user_metadata = TextAreaField('User Metadata', validators=[Optional(), validate_json])
|
|
system_metadata = TextAreaField('System Metadata', validators=[Optional(), validate_json])
|
|
|
|
|
|
class AddDocumentForm(DynamicFormBase):
|
|
file = FileField('File', validators=[FileRequired()])
|
|
catalog = StringField('Catalog', render_kw={'readonly': True})
|
|
sub_file_type = StringField('Sub File Type', validators=[Optional(), Length(max=50)])
|
|
name = StringField('Name', validators=[Length(max=100)])
|
|
language = SelectField('Language', choices=[], validators=[Optional()])
|
|
user_context = TextAreaField('User Context', validators=[Optional()])
|
|
valid_from = DateField('Valid from', id='form-control datepicker', validators=[Optional()])
|
|
user_metadata = TextAreaField('User Metadata', validators=[Optional(), validate_json])
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
super().__init__(*args, **kwargs)
|
|
self.language.choices = [(language, language) for language in
|
|
current_app.config['SUPPORTED_LANGUAGES']]
|
|
if not self.language.data:
|
|
self.language.data = session.get('tenant').get('default_language')
|
|
|
|
self.catalog.data = session.get('catalog_name', '')
|
|
|
|
|
|
class AddURLForm(DynamicFormBase):
|
|
url = URLField('URL', validators=[DataRequired(), URL()])
|
|
catalog = StringField('Catalog', render_kw={'readonly': True})
|
|
sub_file_type = StringField('Sub File Type', validators=[Optional(), Length(max=50)])
|
|
name = StringField('Name', validators=[Length(max=100)])
|
|
language = SelectField('Language', choices=[], validators=[Optional()])
|
|
user_context = TextAreaField('User Context', validators=[Optional()])
|
|
valid_from = DateField('Valid from', id='form-control datepicker', validators=[Optional()])
|
|
user_metadata = TextAreaField('User Metadata', validators=[Optional(), validate_json])
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
super().__init__(*args, **kwargs)
|
|
self.language.choices = [(language, language) for language in
|
|
current_app.config['SUPPORTED_LANGUAGES']]
|
|
if not self.language.data:
|
|
self.language.data = session.get('tenant').get('default_language')
|
|
|
|
self.catalog.data = session.get('catalog_name', '')
|
|
|
|
|
|
class EditDocumentForm(FlaskForm):
|
|
name = StringField('Name', validators=[Length(max=100)])
|
|
valid_from = DateField('Valid from', id='form-control datepicker', validators=[Optional()])
|
|
valid_to = DateField('Valid to', id='form-control datepicker', validators=[Optional()])
|
|
|
|
submit = SubmitField('Submit')
|
|
|
|
|
|
class EditDocumentVersionForm(DynamicFormBase):
|
|
sub_file_type = StringField('Sub File Type', validators=[Optional(), Length(max=50)])
|
|
language = StringField('Language')
|
|
user_context = TextAreaField('User Context', validators=[Optional()])
|
|
system_context = TextAreaField('System Context', validators=[Optional()])
|
|
user_metadata = TextAreaField('User Metadata', validators=[Optional(), validate_json])
|
|
system_metadata = TextAreaField('System Metadata', validators=[Optional(), validate_json])
|
|
|
|
submit = SubmitField('Submit')
|