From adee283d7a8b4d7af96c133bfc050ad88abaa015 Mon Sep 17 00:00:00 2001 From: Josako Date: Mon, 13 May 2024 14:58:21 +0200 Subject: [PATCH] Simplify model selection for both embeddings and LLM. Editing capabilities for new tenant columns... --- common/models/user.py | 15 +- common/utils/security.py | 4 +- config/config.py | 4 + eveai_app/templates/user/edit_tenant.html | 2 +- eveai_app/templates/user/select_tenant.html | 3 +- eveai_app/views/basic_forms.py | 9 +- eveai_app/views/basic_views.py | 2 - eveai_app/views/document_forms.py | 9 +- eveai_app/views/user_forms.py | 18 +-- eveai_app/views/user_views.py | 151 +++++++++----------- 10 files changed, 98 insertions(+), 119 deletions(-) diff --git a/common/models/user.py b/common/models/user.py index 70a3d2d..e88e31a 100644 --- a/common/models/user.py +++ b/common/models/user.py @@ -24,13 +24,14 @@ class Tenant(db.Model): allowed_languages = db.Column(ARRAY(sa.String(2)), nullable=True) # LLM specific choices - default_embedding_model = db.Column(db.String(50), nullable=True) - allowed_embedding_models = db.Column(ARRAY(sa.String(50)), nullable=True) - default_llm_model = db.Column(db.String(50), nullable=True) - allowed_llm_models = db.Column(ARRAY(sa.String(50)), nullable=True) + embedding_model = db.Column(db.String(50), nullable=True) + llm_model = db.Column(db.String(50), nullable=True) # Embedding variables html_tags = db.Column(ARRAY(sa.String(10)), nullable=True, default=['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'li']) + html_end_tags = db.Column(ARRAY(sa.String(10)), nullable=True, default=['p', 'li']) + html_included_elements = db.Column(ARRAY(sa.String(50)), nullable=True) + html_excluded_elements = db.Column(ARRAY(sa.String(50)), nullable=True) # Licensing Information license_start_date = db.Column(db.Date, nullable=True) @@ -50,10 +51,8 @@ class Tenant(db.Model): 'website': self.website, 'default_language': self.default_language, 'allowed_languages': self.allowed_languages, - 'default_embedding_model': self.default_embedding_model, - 'allowed_embedding_models': self.allowed_embedding_models, - 'default_llm_model': self.default_llm_model, - 'allowed_llm_models': self.allowed_llm_models, + 'embedding_model': self.embedding_model, + 'llm_model': self.llm_model, 'license_start_date': self.license_start_date, 'license_end_date': self.license_end_date, 'allowed_monthly_interactions': self.allowed_monthly_interactions diff --git a/common/utils/security.py b/common/utils/security.py index 536f2bc..6624ab9 100644 --- a/common/utils/security.py +++ b/common/utils/security.py @@ -7,8 +7,8 @@ def set_tenant_session_data(sender, user, **kwargs): tenant = Tenant.query.filter_by(id=user.tenant_id).first() session['tenant'] = tenant.to_dict() session['default_language'] = tenant.default_language - session['default_embedding_model'] = tenant.default_embedding_model - session['default_llm_model'] = tenant.default_llm_model + session['default_embedding_model'] = tenant.embedding_model + session['default_llm_model'] = tenant.llm_model def clear_tenant_session_data(sender, user, **kwargs): diff --git a/config/config.py b/config/config.py index bfceda5..d731a3a 100644 --- a/config/config.py +++ b/config/config.py @@ -51,6 +51,10 @@ class Config(object): CELERY_TIMEZONE = 'UTC' CELERY_ENABLE_UTC = True + # Chunk Definition + MIN_CHUNK_SIZE = 2000 + MAX_CHUNK_SIZE = 3000 + # LLM TEMPLATES GPT4_SUMMARY_TEMPLATE = """Write a concise summary of the text in the same language as the provided text. Text is delimited between triple backquotes. diff --git a/eveai_app/templates/user/edit_tenant.html b/eveai_app/templates/user/edit_tenant.html index ec22a31..40e31c5 100644 --- a/eveai_app/templates/user/edit_tenant.html +++ b/eveai_app/templates/user/edit_tenant.html @@ -9,7 +9,7 @@ {% block content %}
{{ form.hidden_tag() }} - {% set disabled_fields = ['name'] %} + {% set disabled_fields = ['name', 'embedding_model', 'llm_model'] %} {% set exclude_fields = [] %} {% for field in form %} {{ render_field(field, disabled_fields, exclude_fields) }} diff --git a/eveai_app/templates/user/select_tenant.html b/eveai_app/templates/user/select_tenant.html index ddb5097..f8ff2dc 100644 --- a/eveai_app/templates/user/select_tenant.html +++ b/eveai_app/templates/user/select_tenant.html @@ -26,7 +26,8 @@ {% endfor %} - + +
diff --git a/eveai_app/views/basic_forms.py b/eveai_app/views/basic_forms.py index 3eb3599..e5528d3 100644 --- a/eveai_app/views/basic_forms.py +++ b/eveai_app/views/basic_forms.py @@ -12,8 +12,6 @@ class SessionDefaultsForm(FlaskForm): # Tenant Defaults tenant_name = StringField('Tenant Name', validators=[DataRequired()]) default_language = SelectField('Default Language', choices=[], validators=[DataRequired()]) - default_embedding_model = SelectField('Default Embedding Model', choices=[], validators=[DataRequired()]) - default_llm_model = SelectField('Default LLM Model', choices=[], validators=[DataRequired()]) def __init__(self): super().__init__() @@ -22,9 +20,4 @@ class SessionDefaultsForm(FlaskForm): self.tenant_name.data = session.get('tenant').get('name') self.default_language.choices = [(lang, lang.lower()) for lang in session.get('tenant').get('allowed_languages')] - self.default_language.data = session.get('default_language') - self.default_embedding_model.choices = [(model, model) for model in - session.get('tenant').get('allowed_embedding_models')] - self.default_embedding_model.data = session.get('default_embedding_model') - self.default_llm_model.choices = [(model, model) for model in session.get('tenant').get('allowed_llm_models')] - self.default_llm_model.data = session.get('default_llm_model') + self.default_language.data = session.get('default_language') \ No newline at end of file diff --git a/eveai_app/views/basic_views.py b/eveai_app/views/basic_views.py index f3eb4a5..51a112c 100644 --- a/eveai_app/views/basic_views.py +++ b/eveai_app/views/basic_views.py @@ -18,8 +18,6 @@ def session_defaults(): if form.validate_on_submit(): session['default_language'] = form.default_language.data - session['default_embedding_model'] = form.default_embedding_model.data - session['default_llm_model'] = form.default_llm_model.data return render_template('basic/session_defaults.html', form=form) diff --git a/eveai_app/views/document_forms.py b/eveai_app/views/document_forms.py index 6346dde..39fcf9d 100644 --- a/eveai_app/views/document_forms.py +++ b/eveai_app/views/document_forms.py @@ -22,9 +22,8 @@ class AddDocumentForm(FlaskForm): self.language.choices = [(language, language) for language in session.get('tenant').get('allowed_languages')] self.language.data = session.get('default_language') - self.doc_embedding_model.choices = [(model, model) for model in - session.get('tenant').get('allowed_embedding_models')] - self.doc_embedding_model.data = session.get('default_embedding_model') + + self.doc_embedding_model.data = session.get('embedding_model') class AddURLForm(FlaskForm): @@ -33,7 +32,7 @@ class AddURLForm(FlaskForm): language = SelectField('Language', choices=[], validators=[Optional()]) user_context = TextAreaField('User Context', validators=[Optional()]) valid_from = DateField('Valid from', id='form-control datepicker', validators=[Optional()]) - doc_embedding_model = SelectField('Default Embedding Model', choices=[], validators=[DataRequired()]) + doc_embedding_model = SelectField('Embedding Model', choices=[], validators=[DataRequired()]) submit = SubmitField('Submit') @@ -42,6 +41,4 @@ class AddURLForm(FlaskForm): self.language.choices = [(language, language) for language in session.get('tenant').get('allowed_languages')] self.language.data = session.get('default_language') - self.doc_embedding_model.choices = [(model, model) for model in - session.get('tenant').get('allowed_embedding_models')] self.doc_embedding_model.data = session.get('default_embedding_model') diff --git a/eveai_app/views/user_forms.py b/eveai_app/views/user_forms.py index 4a7918c..6541b5f 100644 --- a/eveai_app/views/user_forms.py +++ b/eveai_app/views/user_forms.py @@ -13,17 +13,19 @@ class TenantForm(FlaskForm): default_language = SelectField('Default Language', choices=[], validators=[DataRequired()]) allowed_languages = SelectMultipleField('Allowed Languages', choices=[], validators=[DataRequired()]) # LLM fields - default_embedding_model = SelectField('Default Embedding Model', choices=[], validators=[DataRequired()]) - allowed_embedding_models = SelectMultipleField('Allowed Embedding Models', choices=[], validators=[DataRequired()]) - default_llm_model = SelectField('Default Large Language Model', choices=[], validators=[DataRequired()]) - allowed_llm_models = SelectMultipleField('Allowed Large Language Models', choices=[], validators=[DataRequired()]) + embedding_model = SelectField('Embedding Model', choices=[], validators=[DataRequired()]) + llm_model = SelectField('Large Language Model', choices=[], validators=[DataRequired()]) # license fields license_start_date = DateField('License Start Date', id='form-control datepicker', validators=[Optional()]) license_end_date = DateField('License End Date', id='form-control datepicker', validators=[Optional()]) allowed_monthly_interactions = IntegerField('Allowed Monthly Interactions', validators=[NumberRange(min=0)]) # Embedding variables - html_tags = StringField('HTML Tags', validators=[DataRequired(), Length(max=255)], + html_tags = StringField('HTML Tags', validators=[DataRequired()], default='p, h1, h2, h3, h4, h5, h6, li') + html_end_tags = StringField('HTML End Tags', validators=[DataRequired()], + default='p, li') + html_included_elements = StringField('HTML Included Elements', validators=[Optional()]) + html_excluded_elements = StringField('HTML Excluded Elements', validators=[Optional()]) submit = SubmitField('Submit') @@ -33,10 +35,8 @@ class TenantForm(FlaskForm): self.default_language.choices = [(lang, lang.lower()) for lang in current_app.config['SUPPORTED_LANGUAGES']] self.allowed_languages.choices = [(lang, lang.lower()) for lang in current_app.config['SUPPORTED_LANGUAGES']] # initialise LLM fields - self.default_embedding_model.choices = [(model, model) for model in current_app.config['SUPPORTED_EMBEDDINGS']] - self.allowed_embedding_models.choices = [(model, model) for model in current_app.config['SUPPORTED_EMBEDDINGS']] - self.default_llm_model.choices = [(model, model) for model in current_app.config['SUPPORTED_LLMS']] - self.allowed_llm_models.choices = [(model, model) for model in current_app.config['SUPPORTED_LLMS']] + self.embedding_model.choices = [(model, model) for model in current_app.config['SUPPORTED_EMBEDDINGS']] + self.llm_model.choices = [(model, model) for model in current_app.config['SUPPORTED_LLMS']] class BaseUserForm(FlaskForm): diff --git a/eveai_app/views/user_views.py b/eveai_app/views/user_views.py index 715bce8..2da8f9a 100644 --- a/eveai_app/views/user_views.py +++ b/eveai_app/views/user_views.py @@ -3,6 +3,7 @@ import uuid from datetime import datetime as dt, timezone as tz from flask import request, redirect, url_for, flash, render_template, Blueprint, session, current_app from flask_security import hash_password, roles_required, roles_accepted +from sqlalchemy.exc import SQLAlchemyError from common.models.user import User, Tenant, Role from common.extensions import db @@ -15,82 +16,46 @@ user_bp = Blueprint('user_bp', __name__, url_prefix='/user') @user_bp.route('/tenant', methods=['GET', 'POST']) @roles_required('Super User') def tenant(): - if request.method == 'POST': - # Handle the required attributes - name = request.form.get('name') - website = request.form.get('website') - error = None - - if not name: - error = 'Tenant name is required.' - elif not website: - error = 'Tenant website is required.' - - # Create new tenant if there is no error - if error is None: - new_tenant = Tenant(name=name, website=website) - - # Handle optional attributes - lic_start = request.form.get('license_start_date') - lic_end = request.form.get('license_end_date') - monthly = request.form.get('allowed_monthly_interactions') - - # language fields - default_language = request.form.get('default_language') - allowed_languages = request.form.getlist('allowed_languages') - - if default_language != '': - new_tenant.default_language = default_language - if allowed_languages != '': - new_tenant.allowed_languages = allowed_languages - - # LLM fields - default_embedding = request.form.get('default_embedding') - allowed_embeddings = request.form.getlist('allowed_embeddings') - - if default_embedding != '': - new_tenant.default_embedding = default_embedding - if allowed_embeddings != '': - new_tenant.allowed_embeddings = allowed_embeddings - - default_llm_model = request.form.get('default_llm_model') - allowed_llm_models = request.form.getlist('allowed_llm_models') - - if default_llm_model != '': - new_tenant.default_llm_model = default_llm_model - if allowed_llm_models != '': - new_tenant.allowed_llm_models = allowed_llm_models - - # license data - if lic_start != '': - new_tenant.license_start_date = dt.strptime(lic_start, '%Y-%m-%d') - if lic_end != '': - new_tenant.license_end_date = dt.strptime(lic_end, '%Y-%m-%d') - if monthly != '': - new_tenant.allowed_monthly_interactions = int(monthly) - - # Handle Timestamps - timestamp = dt.now(tz.utc) - new_tenant.created_at = timestamp - new_tenant.updated_at = timestamp - - # Add the new tenant to the database and commit the changes - try: - db.session.add(new_tenant) - db.session.commit() - except Exception as e: - error = e.args - - # Create schema for new tenant - if error is None: - current_app.logger.info(f"Successfully created tenant {new_tenant.id} in Database") - current_app.logger.info(f"Creating schema for tenant {new_tenant.id}") - Database(new_tenant.id).create_tenant_schema() - else: - current_app.logger.error(f"Error creating tenant: {error}") - flash(error) if error else flash('Tenant added successfully.') - form = TenantForm() + if form.validate_on_submit(): + # Handle the required attributes + new_tenant = Tenant(name=form.name.data, + website=form.website.data, + default_language=form.default_language.data, + allowed_languages=form.allowed_languages.data, + embedding_model=form.embedding_model.data, + llm_model=form.llm_model.data, + licence_start_date=form.license_start_date.data, + lic_end_date=form.license_end_date.data, + allowed_monthly_interactions=form.allowed_monthly_interactions.data) + + # Handle Embedding Variables + new_tenant.html_tags = form.html_tags.data.split(',') if form.html_tags.data else [], + new_tenant.html_end_tags = form.html_end_tags.data.split(',') if form.html_end_tags.data else [], + new_tenant.html_included_elements = form.html_included_elements.data.split(',') if form.html_included_elements.data else [], + new_tenant.html_excluded_elements = form.html_excluded_elements.data.split(',') if form.html_excluded_elements.data else [] + + # Handle Timestamps + timestamp = dt.now(tz.utc) + new_tenant.created_at = timestamp + new_tenant.updated_at = timestamp + + # Add the new tenant to the database and commit the changes + try: + db.session.add(new_tenant) + db.session.commit() + except SQLAlchemyError as e: + current_app.logger.error(f'Failed to add tenant to database. Error: {str(e)}') + flash(f'Failed to add tenant to database. Error: {str(e)}') + return render_template('user/tenant.html', form=form) + + # Create schema for new tenant + current_app.logger.info(f"Successfully created tenant {new_tenant.id} in Database") + flash(f"Successfully created tenant {new_tenant.id} in Database") + current_app.logger.info(f"Creating schema for tenant {new_tenant.id}") + Database(new_tenant.id).create_tenant_schema() + return redirect(url_for('basic_bp.index')) + return render_template('user/tenant.html', form=form) @@ -100,11 +65,30 @@ def edit_tenant(tenant_id): tenant = Tenant.query.get_or_404(tenant_id) # This will return a 404 if no tenant is found form = TenantForm(obj=tenant) + if request.method == 'GET': + # Populate the form with tenant data + form.populate_obj(tenant) + if tenant.html_tags: + form.html_tags.data = ', '.join(tenant.html_tags) + if tenant.html_end_tags: + form.html_end_tags.data = ', '.join(tenant.html_end_tags) + if tenant.html_included_elements: + form.html_included_elements.data = ', '.join(tenant.html_included_elements) + if tenant.html_excluded_elements: + form.html_excluded_elements.data = ', '.join(tenant.html_excluded_elements) + if request.method == 'POST' and form.validate_on_submit(): # Populate the tenant with form data form.populate_obj(tenant) + # Then handle the special fields manually + tenant.html_tags = [tag.strip() for tag in form.html_tags.data.split(',') if tag.strip()] + tenant.html_end_tags = [tag.strip() for tag in form.html_end_tags.data.split(',') if tag.strip()] + tenant.html_included_elements = [elem.strip() for elem in form.html_included_elements.data.split(',') if + elem.strip()] + tenant.html_excluded_elements = [elem.strip() for elem in form.html_excluded_elements.data.split(',') if + elem.strip()] + db.session.commit() - print(session) flash('Tenant updated successfully.', 'success') if session.get('tenant'): if session['tenant'].get('id') == tenant_id: @@ -215,14 +199,17 @@ def handle_tenant_selection(): the_tenant = Tenant.query.get(tenant_id) session['tenant'] = the_tenant.to_dict() session['default_language'] = the_tenant.default_language - session['default_embedding_model'] = the_tenant.default_embedding_model - session['default_llm_model'] = the_tenant.default_llm_model + session['embedding_model'] = the_tenant.embedding_model + session['llm_model'] = the_tenant.llm_model action = request.form['action'] - if action == 'view_users': - return redirect(url_for('user_bp.view_users', tenant_id=tenant_id)) - elif action == 'edit_tenant': - return redirect(url_for('user_bp.edit_tenant', tenant_id=tenant_id)) + match action: + case 'view_users': + return redirect(url_for('user_bp.view_users', tenant_id=tenant_id)) + case 'edit_tenant': + return redirect(url_for('user_bp.edit_tenant', tenant_id=tenant_id)) + case 'select_tenant': + return redirect(url_for('basic_bp.session_defaults')) # Add more conditions for other actions return redirect(url_for('select_tenant'))