7 Commits

Author SHA1 Message Date
Josako
88f4db1178 - Organise retrievers 2024-11-01 11:19:55 +01:00
Josako
2df291ea91 - Organise retrievers 2024-11-01 11:19:34 +01:00
Josako
5841525b4c - When no explicit path is given in the browser, we automatically get redirected to the admin interface (eveai_app)
- Tuning moved to Retriever iso in the configuration, as this is an attribute that should be available for all types of Retrievers
2024-10-31 08:32:02 +01:00
Josako
532073d38e - Add dynamic fields to DocumentVersion in case the Catalog requires it. 2024-10-30 13:52:18 +01:00
Josako
43547287b1 - Refining & Enhancing dynamic fields
- Creating a specialized Form class for handling dynamic fields
- Refinement of HTML-macros to handle dynamic fields
- Introduction of dynamic fields for Catalogs
2024-10-29 09:17:44 +01:00
Josako
aa358df28e - Allowing for multiple types of Catalogs
- Introduction of retrievers
- Ensuring processing information is collected from Catalog iso Tenant
- Introduction of a generic Form class to enable dynamic fields based on a configuration
- Realisation of Retriever functionality to support dynamic fields
2024-10-25 14:11:47 +02:00
Josako
30fec27488 - Release script added to tag in both git and docker 2024-10-21 07:45:06 +02:00
33 changed files with 1348 additions and 170 deletions

View File

View File

View File

@@ -11,16 +11,22 @@ from common.utils.datetime_utils import get_date_in_timezone
from common.utils.model_utils import ModelVariables
class EveAIRetriever(BaseRetriever, BaseModel):
class EveAIDefaultRagRetriever(BaseRetriever, BaseModel):
_catalog_id: int = PrivateAttr()
_model_variables: ModelVariables = PrivateAttr()
_tenant_info: Dict[str, Any] = PrivateAttr()
def __init__(self, model_variables: ModelVariables, tenant_info: Dict[str, Any]):
def __init__(self, catalog_id: int, model_variables: ModelVariables, tenant_info: Dict[str, Any]):
super().__init__()
current_app.logger.debug(f'Model variables type: {type(model_variables)}')
self._catalog_id = catalog_id
self._model_variables = model_variables
self._tenant_info = tenant_info
@property
def catalog_id(self) -> int:
return self._catalog_id
@property
def model_variables(self) -> ModelVariables:
return self._model_variables
@@ -100,7 +106,8 @@ class EveAIRetriever(BaseRetriever, BaseModel):
.filter(
or_(Document.valid_from.is_(None), func.date(Document.valid_from) <= current_date),
or_(Document.valid_to.is_(None), func.date(Document.valid_to) >= current_date),
(1 - db_class.embedding.cosine_distance(query_embedding)) > similarity_threshold
(1 - db_class.embedding.cosine_distance(query_embedding)) > similarity_threshold,
Document.catalog_id == self._catalog_id
)
.order_by(desc('similarity'))
.limit(k)

View File

@@ -0,0 +1,154 @@
from langchain_core.retrievers import BaseRetriever
from sqlalchemy import func, and_, or_, desc, cast, JSON
from sqlalchemy.exc import SQLAlchemyError
from pydantic import BaseModel, Field, PrivateAttr
from typing import Any, Dict, List, Optional
from flask import current_app
from contextlib import contextmanager
from common.extensions import db
from common.models.document import Document, DocumentVersion, Catalog
from common.utils.datetime_utils import get_date_in_timezone
from common.utils.model_utils import ModelVariables
class EveAIDossierRetriever(BaseRetriever, BaseModel):
_catalog_id: int = PrivateAttr()
_model_variables: ModelVariables = PrivateAttr()
_tenant_info: Dict[str, Any] = PrivateAttr()
_active_filters: Optional[Dict[str, Any]] = PrivateAttr()
def __init__(self, catalog_id: int, model_variables: ModelVariables, tenant_info: Dict[str, Any]):
super().__init__()
self._catalog_id = catalog_id
self._model_variables = model_variables
self._tenant_info = tenant_info
self._active_filters = None
@contextmanager
def filtering(self, metadata_filters: Dict[str, Any]):
"""Context manager for temporarily setting metadata filters"""
previous_filters = self._active_filters
self._active_filters = metadata_filters
try:
yield self
finally:
self._active_filters = previous_filters
def _build_metadata_filter_conditions(self, query):
"""Build SQL conditions for metadata filtering"""
if not self._active_filters:
return query
conditions = []
for field, value in self._active_filters.items():
if value is None:
continue
# Handle both single values and lists of values
if isinstance(value, (list, tuple)):
# Multiple values - create OR condition
or_conditions = []
for val in value:
or_conditions.append(
cast(DocumentVersion.user_metadata[field].astext, JSON) == str(val)
)
if or_conditions:
conditions.append(or_(*or_conditions))
else:
# Single value - direct comparison
conditions.append(
cast(DocumentVersion.user_metadata[field].astext, JSON) == str(value)
)
if conditions:
query = query.filter(and_(*conditions))
return query
def _get_relevant_documents(self, query: str):
current_app.logger.debug(f'Retrieving relevant documents for dossier query: {query}')
if self._active_filters:
current_app.logger.debug(f'Using metadata filters: {self._active_filters}')
query_embedding = self._get_query_embedding(query)
db_class = self.model_variables['embedding_db_model']
similarity_threshold = self.model_variables['similarity_threshold']
k = self.model_variables['k']
try:
current_date = get_date_in_timezone(self.tenant_info['timezone'])
# Subquery to find the latest version of each document
subquery = (
db.session.query(
DocumentVersion.doc_id,
func.max(DocumentVersion.id).label('latest_version_id')
)
.group_by(DocumentVersion.doc_id)
.subquery()
)
# Build base query
# Build base query
query_obj = (
db.session.query(db_class,
(1 - db_class.embedding.cosine_distance(query_embedding)).label('similarity'))
.join(DocumentVersion, db_class.doc_vers_id == DocumentVersion.id)
.join(Document, DocumentVersion.doc_id == Document.id)
.join(subquery, DocumentVersion.id == subquery.c.latest_version_id)
.filter(
or_(Document.valid_from.is_(None), func.date(Document.valid_from) <= current_date),
or_(Document.valid_to.is_(None), func.date(Document.valid_to) >= current_date),
(1 - db_class.embedding.cosine_distance(query_embedding)) > similarity_threshold,
Document.catalog_id == self._catalog_id
)
)
# Apply metadata filters
query_obj = self._build_metadata_filter_conditions(query_obj)
# Order and limit results
query_obj = query_obj.order_by(desc('similarity')).limit(k)
# Debug logging for RAG tuning if enabled
if self.model_variables['rag_tuning']:
self._log_rag_tuning(query_obj, query_embedding)
res = query_obj.all()
result = []
for doc in res:
if self.model_variables['rag_tuning']:
current_app.logger.debug(f'Document ID: {doc[0].id} - Distance: {doc[1]}\n')
current_app.logger.debug(f'Chunk: \n {doc[0].chunk}\n\n')
result.append(f'SOURCE: {doc[0].id}\n\n{doc[0].chunk}\n\n')
except SQLAlchemyError as e:
current_app.logger.error(f'Error retrieving relevant documents: {e}')
db.session.rollback()
return []
return result
def _log_rag_tuning(self, query_obj, query_embedding):
"""Log debug information for RAG tuning"""
current_app.rag_tuning_logger.debug("Debug: Query execution plan:")
current_app.rag_tuning_logger.debug(f"{query_obj.statement}")
if self._active_filters:
current_app.rag_tuning_logger.debug("Debug: Active metadata filters:")
current_app.rag_tuning_logger.debug(f"{self._active_filters}")
def _get_query_embedding(self, query: str):
"""Get embedding for the query text"""
embedding_model = self.model_variables['embedding_model']
query_embedding = embedding_model.embed_query(query)
return query_embedding
@property
def model_variables(self) -> ModelVariables:
return self._model_variables
@property
def tenant_info(self) -> Dict[str, Any]:
return self._tenant_info

View File

@@ -0,0 +1,40 @@
from pydantic import BaseModel, PrivateAttr
from typing import Dict, Any
from common.utils.model_utils import ModelVariables
class EveAIRetriever(BaseModel):
_catalog_id: int = PrivateAttr()
_user_metadata: Dict[str, Any] = PrivateAttr()
_system_metadata: Dict[str, Any] = PrivateAttr()
_configuration: Dict[str, Any] = PrivateAttr()
_tenant_info: Dict[str, Any] = PrivateAttr()
_model_variables: ModelVariables = PrivateAttr()
_tuning: bool = PrivateAttr()
def __init__(self, catalog_id: int, user_metadata: Dict[str, Any], system_metadata: Dict[str, Any],
configuration: Dict[str, Any]):
super().__init__()
self._catalog_id = catalog_id
self._user_metadata = user_metadata
self._system_metadata = system_metadata
self._configuration = configuration
@property
def catalog_id(self):
return self._catalog_id
@property
def user_metadata(self):
return self._user_metadata
@property
def system_metadata(self):
return self._system_metadata
@property
def configuration(self):
return self._configuration
# Any common methods that should be shared among retrievers can go here.

View File

@@ -10,6 +10,7 @@ class Catalog(db.Model):
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String(50), nullable=False)
description = db.Column(db.Text, nullable=True)
type = db.Column(db.String(50), nullable=False, default="DEFAULT_CATALOG")
# Embedding variables
html_tags = db.Column(ARRAY(sa.String(10)), nullable=True, default=['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'li'])
@@ -21,17 +22,37 @@ class Catalog(db.Model):
min_chunk_size = db.Column(db.Integer, nullable=True, default=2000)
max_chunk_size = db.Column(db.Integer, nullable=True, default=3000)
# Embedding search variables ==> move to specialist?
es_k = db.Column(db.Integer, nullable=True, default=8)
es_similarity_threshold = db.Column(db.Float, nullable=True, default=0.4)
# Chat variables ==> Move to Specialist?
chat_RAG_temperature = db.Column(db.Float, nullable=True, default=0.3)
chat_no_RAG_temperature = db.Column(db.Float, nullable=True, default=0.5)
# Tuning enablers
embed_tuning = db.Column(db.Boolean, nullable=True, default=False)
rag_tuning = db.Column(db.Boolean, nullable=True, default=False) # Move to Specialist?
# Meta Data
user_metadata = db.Column(JSONB, nullable=True)
system_metadata = db.Column(JSONB, nullable=True)
configuration = db.Column(JSONB, nullable=True)
# Versioning Information
created_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now())
created_by = db.Column(db.Integer, db.ForeignKey(User.id), nullable=True)
updated_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now(), onupdate=db.func.now())
updated_by = db.Column(db.Integer, db.ForeignKey(User.id))
class Retriever(db.Model):
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String(50), nullable=False)
description = db.Column(db.Text, nullable=True)
catalog_id = db.Column(db.Integer, db.ForeignKey('catalog.id'), nullable=True)
type = db.Column(db.String(50), nullable=False, default="DEFAULT_RAG")
tuning = db.Column(db.Boolean, nullable=True, default=False)
# Meta Data
user_metadata = db.Column(JSONB, nullable=True)
system_metadata = db.Column(JSONB, nullable=True)
configuration = db.Column(JSONB, nullable=True)
# Versioning Information
created_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now())
@@ -74,6 +95,7 @@ class DocumentVersion(db.Model):
system_context = db.Column(db.Text, nullable=True)
user_metadata = db.Column(JSONB, nullable=True)
system_metadata = db.Column(JSONB, nullable=True)
catalog_properties = db.Column(JSONB, nullable=True)
# Versioning Information
created_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now())

View File

@@ -27,6 +27,7 @@ def create_document_stack(api_input, file, filename, extension, tenant_id):
api_input.get('language', 'en'),
api_input.get('user_context', ''),
api_input.get('user_metadata'),
api_input.get('catalog_properties')
)
db.session.add(new_doc_vers)
@@ -63,7 +64,7 @@ def create_document(form, filename, catalog_id):
return new_doc
def create_version_for_document(document, tenant_id, url, language, user_context, user_metadata):
def create_version_for_document(document, tenant_id, url, language, user_context, user_metadata, catalog_properties):
new_doc_vers = DocumentVersion()
if url != '':
new_doc_vers.url = url
@@ -79,6 +80,9 @@ def create_version_for_document(document, tenant_id, url, language, user_context
if user_metadata != '' and user_metadata is not None:
new_doc_vers.user_metadata = user_metadata
if catalog_properties != '' and catalog_properties is not None:
new_doc_vers.catalog_properties = catalog_properties
new_doc_vers.document = document
set_logging_information(new_doc_vers, dt.now(tz.utc))
@@ -273,9 +277,10 @@ def edit_document(document_id, name, valid_from, valid_to):
return None, str(e)
def edit_document_version(version_id, user_context):
def edit_document_version(version_id, user_context, catalog_properties):
doc_vers = DocumentVersion.query.get_or_404(version_id)
doc_vers.user_context = user_context
doc_vers.catalog_properties = catalog_properties
update_logging_information(doc_vers, dt.now(tz.utc))
try:
@@ -300,6 +305,7 @@ def refresh_document_with_info(doc_id, tenant_id, api_input):
api_input.get('language', old_doc_vers.language),
api_input.get('user_context', old_doc_vers.user_context),
api_input.get('user_metadata', old_doc_vers.user_metadata),
api_input.get('catalog_properties', old_doc_vers.catalog_properties),
)
set_logging_information(new_doc_vers, dt.now(tz.utc))
@@ -337,7 +343,8 @@ def refresh_document(doc_id, tenant_id):
api_input = {
'language': old_doc_vers.language,
'user_context': old_doc_vers.user_context,
'user_metadata': old_doc_vers.user_metadata
'user_metadata': old_doc_vers.user_metadata,
'catalog_properties': old_doc_vers.catalog_properties,
}
return refresh_document_with_info(doc_id, tenant_id, api_input)
@@ -348,3 +355,5 @@ def mark_tenant_storage_dirty(tenant_id):
tenant = db.session.query(Tenant).filter_by(id=int(tenant_id)).first()
tenant.storage_dirty = True
db.session.commit()

53
config/catalog_types.py Normal file
View File

@@ -0,0 +1,53 @@
# Catalog Types
CATALOG_TYPES = {
"DEFAULT": {
"name": "Default Catalog",
"Description": "A Catalog with information in Evie's Library, to be considered as a whole",
"configuration": {}
},
"DOSSIER": {
"name": "Dossier Catalog",
"Description": "A Catalog with information in Evie's Library in which several Dossiers can be stored",
"configuration": {
"tagging_fields": {
"name": "Tagging Fields",
"type": "tagging_fields",
"description": """Define the metadata fields that will be used for tagging documents.
Each field must have:
- type: one of 'string', 'integer', 'float', 'date', 'enum'
- required: boolean indicating if the field is mandatory
- description: field description
- allowed_values: list of values (for enum type only)
- min_value/max_value: range limits (for numeric types only)""",
"required": True,
"default": {},
"field_properties": {
"type": {
"allowed_values": ["string", "integer", "float", "date", "enum"],
"required": True
},
"required": {
"type": "boolean",
"default": False
},
"description": {
"type": "string"
},
"allowed_values": {
"type": "list",
"description": "For enum type fields only"
},
"min_value": {
"type": "number",
"description": "For numeric fields only"
},
"max_value": {
"type": "number",
"description": "For numeric fields only"
}
}
}
},
"document_version_configurations": ["tagging_fields"]
},
}

23
config/retriever_types.py Normal file
View File

@@ -0,0 +1,23 @@
# Retriever Types
RETRIEVER_TYPES = {
"DEFAULT_RAG": {
"name": "Default RAG",
"description": "Retrieving all embeddings conform the query",
"configuration": {
"es_k": {
"name": "es_k",
"type": "int",
"description": "K-value to retrieve embeddings (max embeddings retrieved)",
"required": True,
"default": 8,
},
"es_similarity_threshold": {
"name": "es_similarity_threshold",
"type": "float",
"description": "Similarity threshold for retrieving embeddings",
"required": True,
"default": 0.3,
},
}
}
}

View File

@@ -169,4 +169,5 @@ for SERVICE in "${SERVICES[@]}"; do
fi
done
echo "All specified services processed."
echo -e "\033[35mAll specified services processed.\033[0m"
echo -e "\033[35mFinished at $(date +"%d/%m/%Y %H:%M:%S")\033[0m"

26
docker/release_and_tag_eveai.sh Normal file → Executable file
View File

@@ -31,24 +31,25 @@ fi
# Path to your docker-compose file
DOCKER_COMPOSE_FILE="compose_dev.yaml"
# Get all the services defined in the docker-compose file
SERVICES=$(docker-compose -f $DOCKER_COMPOSE_FILE config --services)
# Get all the images defined in docker-compose
IMAGES=$(docker compose -f $DOCKER_COMPOSE_FILE config | grep 'image:' | awk '{ print $2 }')
# Tag and push images for all services that belong to your Docker account
for SERVICE in $SERVICES; do
DOCKER_IMAGE="your-docker-repo/$SERVICE"
# Start tagging only relevant images
for DOCKER_IMAGE in $IMAGES; do
# Check if the image belongs to your Docker account and ends with :latest
if [[ $DOCKER_IMAGE == $DOCKER_ACCOUNT* && $DOCKER_IMAGE == *:latest ]]; then
# Remove the ":latest" tag to use the base image name
BASE_IMAGE=${DOCKER_IMAGE%:latest}
# Check if the image starts with your Docker account name
if [[ $DOCKER_IMAGE == $DOCKER_ACCOUNT* ]]; then
echo "Tagging Docker image for service: $SERVICE with version: $RELEASE_VERSION"
echo "Tagging Docker image: $BASE_IMAGE with version: $RELEASE_VERSION"
# Tag the 'latest' image with the new release version
docker tag $DOCKER_IMAGE:latest $DOCKER_IMAGE:$RELEASE_VERSION
docker tag $DOCKER_IMAGE $BASE_IMAGE:$RELEASE_VERSION
# Push the newly tagged image to Docker Hub
docker push $DOCKER_IMAGE:$RELEASE_VERSION
docker push $BASE_IMAGE:$RELEASE_VERSION
else
echo "Skipping service: $SERVICE (not part of Docker account $DOCKER_ACCOUNT)"
echo "Skipping image: $DOCKER_IMAGE (not part of $DOCKER_ACCOUNT or not tagged as latest)"
fi
done
@@ -57,4 +58,5 @@ echo "Tagging Git repository with version: $RELEASE_VERSION"
git tag -a v$RELEASE_VERSION -m "Release $RELEASE_VERSION: $RELEASE_MESSAGE"
git push origin v$RELEASE_VERSION
echo "Release process completed for version: $RELEASE_VERSION"
echo -e "\033[35mRelease process completed for version: $RELEASE_VERSION \033[0m"
echo -e "\033[35mFinished at $(date +"%d/%m/%Y %H:%M:%S")\033[0m"

View File

@@ -43,6 +43,9 @@ upload_parser.add_argument('valid_from', location='form', type=validate_date, re
help='Valid from date for the document (ISO format)')
upload_parser.add_argument('user_metadata', location='form', type=validate_json, required=False,
help='User metadata for the document (JSON format)')
upload_parser.add_argument('catalog_properties', location='form', type=validate_json, required=False,
help='The catalog configuration to be passed along (JSON format). Validity is against catalog requirements '
'is not checked, and is the responsibility of the calling client.')
add_document_response = document_ns.model('AddDocumentResponse', {
'message': fields.String(description='Status message'),
@@ -82,6 +85,7 @@ class AddDocument(Resource):
'user_context': args.get('user_context'),
'valid_from': args.get('valid_from'),
'user_metadata': args.get('user_metadata'),
'catalog_properties': args.get('catalog_properties'),
}
new_doc, new_doc_vers = create_document_stack(api_input, file, filename, extension, tenant_id)
@@ -111,7 +115,11 @@ add_url_model = document_ns.model('AddURL', {
'user_context': fields.String(required=False, description='User context for the document'),
'valid_from': fields.String(required=False, description='Valid from date for the document'),
'user_metadata': fields.String(required=False, description='User metadata for the document'),
'system_metadata': fields.String(required=False, description='System metadata for the document')
'system_metadata': fields.String(required=False, description='System metadata for the document'),
'catalog_properties': fields.String(required=False, description='The catalog configuration to be passed along (JSON '
'format). Validity is against catalog requirements '
'is not checked, and is the responsibility of the '
'calling client.'),
})
add_url_response = document_ns.model('AddURLResponse', {
@@ -148,6 +156,7 @@ class AddURL(Resource):
'user_context': args.get('user_context'),
'valid_from': args.get('valid_from'),
'user_metadata': args.get('user_metadata'),
'catalog_properties': args.get('catalog_properties'),
}
new_doc, new_doc_vers = create_document_stack(api_input, file_content, filename, extension, tenant_id)
@@ -227,6 +236,7 @@ class DocumentResource(Resource):
edit_document_version_model = document_ns.model('EditDocumentVersion', {
'user_context': fields.String(required=True, description='New user context for the document version'),
'catalog_properties': fields.String(required=True, description='New catalog properties for the document version'),
})
@@ -239,7 +249,7 @@ class DocumentVersionResource(Resource):
def put(self, version_id):
"""Edit a document version"""
data = request.json
updated_version, error = edit_document_version(version_id, data['user_context'])
updated_version, error = edit_document_version(version_id, data['user_context'], data.get('catalog_properties'))
if updated_version:
return {'message': f'Document Version {updated_version.id} updated successfully'}, 200
else:
@@ -251,7 +261,8 @@ refresh_document_model = document_ns.model('RefreshDocument', {
'name': fields.String(required=False, description='New name for the document'),
'language': fields.String(required=False, description='Language of the document'),
'user_context': fields.String(required=False, description='User context for the document'),
'user_metadata': fields.Raw(required=False, description='User metadata for the document')
'user_metadata': fields.Raw(required=False, description='User metadata for the document'),
'catalog_properties': fields.Raw(required=False, description='Catalog properties for the document'),
})
@@ -268,7 +279,7 @@ class RefreshDocument(Resource):
current_app.logger.info(f'Refreshing document {document_id} for tenant {tenant_id}')
try:
new_version, result = refresh_document(document_id)
new_version, result = refresh_document(document_id, tenant_id)
if new_version:
return {
@@ -301,7 +312,7 @@ class RefreshDocumentWithInfo(Resource):
try:
api_input = request.json
new_version, result = refresh_document_with_info(document_id, api_input)
new_version, result = refresh_document_with_info(document_id, tenant_id, api_input)
if new_version:
return {

View File

@@ -11,9 +11,17 @@
{{ form.hidden_tag() }}
{% set disabled_fields = [] %}
{% set exclude_fields = [] %}
{% for field in form %}
{% for field in form.get_static_fields() %}
{{ render_field(field, disabled_fields, exclude_fields) }}
{% endfor %}
{% for collection_name, fields in form.get_dynamic_fields().items() %}
{% if fields|length > 0 %}
<h4 class="mt-4">{{ collection_name }}</h4>
{% endif %}
{% for field in fields %}
{{ render_field(field, disabled_fields, exclude_fields) }}
{% endfor %}
{% endfor %}
<button type="submit" class="btn btn-primary">Add Document</button>
</form>
{% endblock %}

View File

@@ -11,12 +11,22 @@ When you change chunking of embedding information, you'll need to manually refre
{% block content %}
<form method="post">
{{ form.hidden_tag() }}
{% set disabled_fields = [] %}
{% set exclude_fields = [] %}
{% for field in form %}
{% set disabled_fields = ['type'] %}
{% set exclude_fields = [] %}
<!-- Render Static Fields -->
{% for field in form.get_static_fields() %}
{{ render_field(field, disabled_fields, exclude_fields) }}
{% endfor %}
<button type="submit" class="btn btn-primary">Register Catalog</button>
<!-- Render Dynamic Fields -->
{% for collection_name, fields in form.get_dynamic_fields().items() %}
{% if fields|length > 0 %}
<h4 class="mt-4">{{ collection_name }}</h4>
{% endif %}
{% for field in fields %}
{{ render_field(field, disabled_fields, exclude_fields) }}
{% endfor %}
{% endfor %}
<button type="submit" class="btn btn-primary">Save Retriever</button>
</form>
{% endblock %}

View File

@@ -0,0 +1,33 @@
{% extends 'base.html' %}
{% from "macros.html" import render_field %}
{% block title %}Edit Retriever{% endblock %}
{% block content_title %}Edit Retriever{% endblock %}
{% block content_description %}Edit a Retriever (for a Catalog){% endblock %}
{% block content %}
<form method="post">
{{ form.hidden_tag() }}
{% set disabled_fields = ['type'] %}
{% set exclude_fields = [] %}
<!-- Render Static Fields -->
{% for field in form.get_static_fields() %}
{{ render_field(field, disabled_fields, exclude_fields) }}
{% endfor %}
<!-- Render Dynamic Fields -->
{% for collection_name, fields in form.get_dynamic_fields().items() %}
{% if fields|length > 0 %}
<h4 class="mt-4">{{ collection_name }}</h4>
{% endif %}
{% for field in fields %}
{{ render_field(field, disabled_fields, exclude_fields) }}
{% endfor %}
{% endfor %}
<button type="submit" class="btn btn-primary">Save Retriever</button>
</form>
{% endblock %}
{% block content_footer %}
{% endblock %}

View File

@@ -0,0 +1,23 @@
{% extends 'base.html' %}
{% from "macros.html" import render_field %}
{% block title %}Retriever Registration{% endblock %}
{% block content_title %}Register Retriever{% endblock %}
{% block content_description %}Define a new retriever (for a catalog){% endblock %}
{% block content %}
<form method="post">
{{ form.hidden_tag() }}
{% set disabled_fields = [] %}
{% set exclude_fields = [] %}
{% for field in form %}
{{ render_field(field, disabled_fields, exclude_fields) }}
{% endfor %}
<button type="submit" class="btn btn-primary">Register Retriever</button>
</form>
{% endblock %}
{% block content_footer %}
{% endblock %}

View File

@@ -0,0 +1,23 @@
{% extends 'base.html' %}
{% from 'macros.html' import render_selectable_table, render_pagination %}
{% block title %}Retrievers{% endblock %}
{% block content_title %}Retrievers{% endblock %}
{% block content_description %}View Retrieers for Tenant{% endblock %}
{% block content_class %}<div class="col-xl-12 col-lg-5 col-md-7 mx-auto"></div>{% endblock %}
{% block content %}
<div class="container">
<form method="POST" action="{{ url_for('document_bp.handle_retriever_selection') }}">
{{ render_selectable_table(headers=["Retriever ID", "Name", "Type", "Catalog ID"], rows=rows, selectable=True, id="retrieverssTable") }}
<div class="form-group mt-3">
<button type="submit" name="action" value="edit_retriever" class="btn btn-primary">Edit Retriever</button>
</div>
</form>
</div>
{% endblock %}
{% block content_footer %}
{{ render_pagination(pagination, 'document_bp.retrievers') }}
{% endblock %}

View File

@@ -1,18 +1,136 @@
{% macro render_field(field, disabled_fields=[], exclude_fields=[], class='') %}
<!--{% macro render_field(field, disabled_fields=[], exclude_fields=[], class='') %}-->
<!-- {% set disabled = field.name in disabled_fields %}-->
<!-- {% set exclude_fields = exclude_fields + ['csrf_token', 'submit'] %}-->
<!-- {% if field.name not in exclude_fields %}-->
<!-- {% if field.type == 'BooleanField' %}-->
<!-- <div class="form-check">-->
<!-- {{ field(class="form-check-input " + class, type="checkbox", id="flexSwitchCheckDefault") }}-->
<!-- {{ field.label(class="form-check-label", for="flexSwitchCheckDefault", disabled=disabled) }}-->
<!-- </div>-->
<!-- {% else %}-->
<!-- <div class="form-group">-->
<!-- {{ field.label(class="form-label") }}-->
<!-- {{ field(class="form-control " + class, disabled=disabled) }}-->
<!-- {% if field.errors %}-->
<!-- <div class="invalid-feedback">-->
<!-- {% for error in field.errors %}-->
<!-- {{ error }}-->
<!-- {% endfor %}-->
<!-- </div>-->
<!-- {% endif %}-->
<!-- </div>-->
<!-- {% endif %}-->
<!-- {% endif %}-->
<!--{% endmacro %}-->
{% macro render_field_old(field, disabled_fields=[], exclude_fields=[], class='') %}
<!-- Debug info -->
<!-- Field name: {{ field.name }}, Field type: {{ field.__class__.__name__ }} -->
{% set disabled = field.name in disabled_fields %}
{% set exclude_fields = exclude_fields + ['csrf_token', 'submit'] %}
{% if field.name not in exclude_fields %}
{% if field.type == 'BooleanField' %}
<div class="form-check">
{{ field(class="form-check-input " + class, type="checkbox", id="flexSwitchCheckDefault") }}
{{ field.label(class="form-check-label", for="flexSwitchCheckDefault", disabled=disabled) }}
<div class="form-group">
<div class="form-check form-switch">
{{ field(class="form-check-input " + class, disabled=disabled) }}
{% if field.description %}
{{ field.label(class="form-check-label",
**{'data-bs-toggle': 'tooltip',
'data-bs-placement': 'right',
'title': field.description}) }}
{% else %}
{{ field.label(class="form-check-label") }}
{% endif %}
</div>
{% if field.errors %}
<div class="invalid-feedback d-block">
{% for error in field.errors %}
{{ error }}
{% endfor %}
</div>
{% endif %}
</div>
{% else %}
<div class="form-group">
{{ field.label(class="form-label") }}
{{ field(class="form-control " + class, disabled=disabled) }}
{% if field.description %}
{{ field.label(class="form-label",
**{'data-bs-toggle': 'tooltip',
'data-bs-placement': 'right',
'title': field.description}) }}
{% else %}
{{ field.label(class="form-label") }}
{% endif %}
{% if field.type == 'TextAreaField' and 'json-editor' in class %}
<div id="{{ field.id }}-editor" class="json-editor-container"></div>
{{ field(class="form-control d-none " + class, disabled=disabled) }}
{% else %}
{{ field(class="form-control " + class, disabled=disabled) }}
{% endif %}
{% if field.errors %}
<div class="invalid-feedback">
<div class="invalid-feedback d-block">
{% for error in field.errors %}
{{ error }}
{% endfor %}
</div>
{% endif %}
</div>
{% endif %}
{% endif %}
{% endmacro %}
{% macro render_field(field, disabled_fields=[], exclude_fields=[], class='') %}
<!-- Debug info -->
<!-- Field name: {{ field.name }}, Field type: {{ field.__class__.__name__ }} -->
{% set disabled = field.name in disabled_fields %}
{% set exclude_fields = exclude_fields + ['csrf_token', 'submit'] %}
{% if field.name not in exclude_fields %}
{% if field.type == 'BooleanField' %}
<div class="form-group">
<div class="form-check form-switch">
{{ field(class="form-check-input " + class, disabled=disabled) }}
{% if field.description %}
{{ field.label(class="form-check-label",
**{'data-bs-toggle': 'tooltip',
'data-bs-placement': 'right',
'title': field.description}) }}
{% else %}
{{ field.label(class="form-check-label") }}
{% endif %}
</div>
{% if field.errors %}
<div class="invalid-feedback d-block">
{% for error in field.errors %}
{{ error }}
{% endfor %}
</div>
{% endif %}
</div>
{% else %}
<div class="form-group">
{% if field.description %}
{{ field.label(class="form-label",
**{'data-bs-toggle': 'tooltip',
'data-bs-placement': 'right',
'title': field.description}) }}
{% else %}
{{ field.label(class="form-label") }}
{% endif %}
{% if field.type == 'TextAreaField' and 'json-editor' in class %}
<div id="{{ field.id }}-editor" class="json-editor-container"></div>
{{ field(class="form-control d-none " + class, disabled=disabled) }}
{% elif field.type == 'SelectField' %}
{{ field(class="form-control form-select " + class, disabled=disabled) }}
{% else %}
{{ field(class="form-control " + class, disabled=disabled) }}
{% endif %}
{% if field.errors %}
<div class="invalid-feedback d-block">
{% for error in field.errors %}
{{ error }}
{% endfor %}

View File

@@ -83,6 +83,8 @@
{{ dropdown('Document Mgmt', 'note_stack', [
{'name': 'Add Catalog', 'url': '/document/catalog', 'roles': ['Super User', 'Tenant Admin']},
{'name': 'All Catalogs', 'url': '/document/catalogs', 'roles': ['Super User', 'Tenant Admin']},
{'name': 'Add Retriever', 'url': '/document/retriever', 'roles': ['Super User', 'Tenant Admin']},
{'name': 'All Retrievers', 'url': '/document/retrievers', 'roles': ['Super User', 'Tenant Admin']},
{'name': 'Add Document', 'url': '/document/add_document', 'roles': ['Super User', 'Tenant Admin']},
{'name': 'Add URL', 'url': '/document/add_url', 'roles': ['Super User', 'Tenant Admin']},
{'name': 'Add a list of URLs', 'url': '/document/add_urls', 'roles': ['Super User', 'Tenant Admin']},

View File

@@ -14,4 +14,58 @@
<script src="{{url_for('static', filename='assets/js/material-kit-pro.min.js')}}?v=3.0.4 type="text/javascript"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/bootstrap/5.3.3/js/bootstrap.bundle.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/select2/4.0.13/js/select2.min.js"></script>
<link href="https://cdnjs.cloudflare.com/ajax/libs/jsoneditor/10.1.0/jsoneditor.min.css" rel="stylesheet" type="text/css">
<script src="https://cdnjs.cloudflare.com/ajax/libs/jsoneditor/10.1.0/jsoneditor.min.js"></script>
<script>
document.addEventListener('DOMContentLoaded', function() {
// Initialize tooltips
var tooltipTriggerList = [].slice.call(document.querySelectorAll('[data-bs-toggle="tooltip"]'))
var tooltipList = tooltipTriggerList.map(function (tooltipTriggerEl) {
return new bootstrap.Tooltip(tooltipTriggerEl)
});
// Initialize JSON editors
document.querySelectorAll('.json-editor').forEach(function(textarea) {
// Create container for editor
var container = document.getElementById(textarea.id + '-editor');
// Initialize the editor
var editor = new JSONEditor(container, {
mode: 'code',
modes: ['code', 'tree'],
onChangeText: function(jsonString) {
textarea.value = jsonString;
}
});
// Set initial value
try {
const initialValue = textarea.value ? JSON.parse(textarea.value) : {};
editor.set(initialValue);
} catch (e) {
console.error('Error parsing initial JSON:', e);
editor.set({});
}
// Add validation indicator
editor.validate().then(function(errors) {
if (errors.length) {
container.style.border = '2px solid red';
} else {
container.style.border = '1px solid #ccc';
}
});
});
});
</script>
<style>
.json-editor-container {
height: 400px;
margin-bottom: 1rem;
}
.tooltip {
position: fixed;
}
</style>

View File

@@ -1,4 +1,4 @@
from flask import session, current_app
from flask import session, current_app, request
from flask_wtf import FlaskForm
from wtforms import (StringField, BooleanField, SubmitField, DateField, IntegerField, FloatField, SelectMultipleField,
SelectField, FieldList, FormField, TextAreaField, URLField)
@@ -6,6 +6,15 @@ from wtforms.validators import DataRequired, Length, Optional, URL, ValidationEr
from flask_wtf.file import FileField, FileAllowed, FileRequired
import json
from wtforms_sqlalchemy.fields import QuerySelectField
from common.extensions import db
from common.models.document import Catalog
from config.catalog_types import CATALOG_TYPES
from config.retriever_types import RETRIEVER_TYPES
from .dynamic_form_base import DynamicFormBase
def allowed_file(form, field):
if field.data:
@@ -26,34 +35,123 @@ def validate_json(form, field):
class CatalogForm(FlaskForm):
name = StringField('Name', validators=[DataRequired(), Length(max=50)])
description = TextAreaField('Description', validators=[Optional()])
# Select Field for Catalog Type (Uses the CATALOG_TYPES defined in config)
type = SelectField('Catalog Type', validators=[DataRequired()])
# Metadata fields
user_metadata = TextAreaField('User Metadata', validators=[Optional(), validate_json])
system_metadata = TextAreaField('System Metadata', validators=[Optional(), validate_json])
configuration = TextAreaField('Configuration', validators=[Optional(), validate_json])
# HTML Embedding Variables
html_tags = StringField('HTML Tags', validators=[DataRequired()],
default='p, h1, h2, h3, h4, h5, h6, li, , tbody, tr, td')
html_end_tags = StringField('HTML End Tags', validators=[DataRequired()],
default='p, li')
html_included_elements = StringField('HTML Included Elements', validators=[Optional()])
html_excluded_elements = StringField('HTML Excluded Elements', validators=[Optional()])
html_included_elements = StringField('HTML Included Elements', validators=[Optional()], default='article, main')
html_excluded_elements = StringField('HTML Excluded Elements', validators=[Optional()],
default='header, footer, nav, script')
html_excluded_classes = StringField('HTML Excluded Classes', validators=[Optional()])
min_chunk_size = IntegerField('Minimum Chunk Size (2000)', validators=[NumberRange(min=0), Optional()],
default=2000)
max_chunk_size = IntegerField('Maximum Chunk Size (3000)', validators=[NumberRange(min=0), Optional()],
default=3000)
# Embedding Search variables
es_k = IntegerField('Limit for Searching Embeddings (5)',
default=5,
validators=[NumberRange(min=0)])
es_similarity_threshold = FloatField('Similarity Threshold for Searching Embeddings (0.5)',
default=0.5,
validators=[NumberRange(min=0, max=1)])
# Chat Variables
chat_RAG_temperature = FloatField('RAG Temperature', default=0.3, validators=[NumberRange(min=0, max=1)])
chat_no_RAG_temperature = FloatField('No RAG Temperature', default=0.5, validators=[NumberRange(min=0, max=1)])
# Tuning variables
embed_tuning = BooleanField('Enable Embedding Tuning', default=False)
rag_tuning = BooleanField('Enable RAG Tuning', default=False)
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# Dynamically populate the 'type' field using the constructor
self.type.choices = [(key, value['name']) for key, value in CATALOG_TYPES.items()]
class AddDocumentForm(FlaskForm):
class EditCatalogForm(DynamicFormBase):
name = StringField('Name', validators=[DataRequired(), Length(max=50)])
description = TextAreaField('Description', validators=[Optional()])
# Select Field for Catalog Type (Uses the CATALOG_TYPES defined in config)
type = StringField('Catalog Type', validators=[DataRequired()], render_kw={'readonly': True})
# Metadata fields
user_metadata = TextAreaField('User Metadata', validators=[Optional(), validate_json])
system_metadata = TextAreaField('System Metadata', validators=[Optional(), validate_json],)
# HTML Embedding Variables
html_tags = StringField('HTML Tags', validators=[DataRequired()],
default='p, h1, h2, h3, h4, h5, h6, li, , tbody, tr, td')
html_end_tags = StringField('HTML End Tags', validators=[DataRequired()],
default='p, li')
html_included_elements = StringField('HTML Included Elements', validators=[Optional()], default='article, main')
html_excluded_elements = StringField('HTML Excluded Elements', validators=[Optional()],
default='header, footer, nav, script')
html_excluded_classes = StringField('HTML Excluded Classes', validators=[Optional()])
min_chunk_size = IntegerField('Minimum Chunk Size (2000)', validators=[NumberRange(min=0), Optional()],
default=2000)
max_chunk_size = IntegerField('Maximum Chunk Size (3000)', validators=[NumberRange(min=0), Optional()],
default=3000)
# Chat Variables
chat_RAG_temperature = FloatField('RAG Temperature', default=0.3, validators=[NumberRange(min=0, max=1)])
chat_no_RAG_temperature = FloatField('No RAG Temperature', default=0.5, validators=[NumberRange(min=0, max=1)])
# Tuning variables
embed_tuning = BooleanField('Enable Embedding Tuning', default=False)
class RetrieverForm(FlaskForm):
name = StringField('Name', validators=[DataRequired(), Length(max=50)])
description = TextAreaField('Description', validators=[Optional()])
# Catalog for the Retriever
catalog = QuerySelectField(
'Catalog ID',
query_factory=lambda: Catalog.query.all(),
allow_blank=True,
get_label='name',
validators=[Optional()],
)
# Select Field for Retriever Type (Uses the RETRIEVER_TYPES defined in config)
type = SelectField('Retriever Type', validators=[DataRequired()])
tuning = BooleanField('Enable Tuning', default=False)
# Metadata fields
user_metadata = TextAreaField('User Metadata', validators=[Optional(), validate_json])
system_metadata = TextAreaField('System Metadata', validators=[Optional(), validate_json])
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# Dynamically populate the 'type' field using the constructor
self.type.choices = [(key, value['name']) for key, value in RETRIEVER_TYPES.items()]
class EditRetrieverForm(DynamicFormBase):
name = StringField('Name', validators=[DataRequired(), Length(max=50)])
description = TextAreaField('Description', validators=[Optional()])
# Catalog for the Retriever
catalog = QuerySelectField(
'Catalog ID',
query_factory=lambda: Catalog.query.all(),
allow_blank=True,
get_label='name',
validators=[Optional()],
)
# Select Field for Retriever Type (Uses the RETRIEVER_TYPES defined in config)
type = SelectField('Retriever Type', validators=[DataRequired()], render_kw={'readonly': True})
tuning = BooleanField('Enable Tuning', default=False)
# Metadata fields
user_metadata = TextAreaField('User Metadata', validators=[Optional(), validate_json])
system_metadata = TextAreaField('System Metadata', validators=[Optional(), validate_json])
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# Set the retriever type choices (loaded from config)
self.type.choices = [(key, value['name']) for key, value in RETRIEVER_TYPES.items()]
class AddDocumentForm(DynamicFormBase):
file = FileField('File', validators=[FileRequired(), allowed_file])
name = StringField('Name', validators=[Length(max=100)])
language = SelectField('Language', choices=[], validators=[Optional()])
@@ -61,17 +159,15 @@ class AddDocumentForm(FlaskForm):
valid_from = DateField('Valid from', id='form-control datepicker', validators=[Optional()])
user_metadata = TextAreaField('User Metadata', validators=[Optional(), validate_json])
submit = SubmitField('Submit')
def __init__(self):
super().__init__()
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.language.choices = [(language, language) for language in
session.get('tenant').get('allowed_languages')]
if not self.language.data:
self.language.data = session.get('tenant').get('default_language')
class AddURLForm(FlaskForm):
class AddURLForm(DynamicFormBase):
url = URLField('URL', validators=[DataRequired(), URL()])
name = StringField('Name', validators=[Length(max=100)])
language = SelectField('Language', choices=[], validators=[Optional()])
@@ -79,10 +175,8 @@ class AddURLForm(FlaskForm):
valid_from = DateField('Valid from', id='form-control datepicker', validators=[Optional()])
user_metadata = TextAreaField('User Metadata', validators=[Optional(), validate_json])
submit = SubmitField('Submit')
def __init__(self):
super().__init__()
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.language.choices = [(language, language) for language in
session.get('tenant').get('allowed_languages')]
if not self.language.data:
@@ -114,7 +208,7 @@ class EditDocumentForm(FlaskForm):
submit = SubmitField('Submit')
class EditDocumentVersionForm(FlaskForm):
class EditDocumentVersionForm(DynamicFormBase):
language = StringField('Language')
user_context = TextAreaField('User Context', validators=[Optional()])
system_context = TextAreaField('System Context', validators=[Optional()])

View File

@@ -14,7 +14,7 @@ from urllib.parse import urlparse, unquote
import io
import json
from common.models.document import Document, DocumentVersion, Catalog
from common.models.document import Document, DocumentVersion, Catalog, Retriever
from common.extensions import db, minio_client
from common.utils.document_utils import validate_file_type, create_document_stack, start_embedding_task, process_url, \
process_multiple_urls, get_documents_list, edit_document, \
@@ -22,13 +22,15 @@ from common.utils.document_utils import validate_file_type, create_document_stac
from common.utils.eveai_exceptions import EveAIInvalidLanguageException, EveAIUnsupportedFileType, \
EveAIDoubleURLException
from .document_forms import AddDocumentForm, AddURLForm, EditDocumentForm, EditDocumentVersionForm, AddURLsForm, \
CatalogForm
CatalogForm, EditCatalogForm, RetrieverForm, EditRetrieverForm
from common.utils.middleware import mw_before_request
from common.utils.celery_utils import current_celery
from common.utils.nginx_utils import prefixed_url_for
from common.utils.view_assistants import form_validation_failed, prepare_table_for_macro, form_to_dict
from .document_list_view import DocumentListView
from .document_version_list_view import DocumentVersionListView
from config.catalog_types import CATALOG_TYPES
from config.retriever_types import RETRIEVER_TYPES
document_bp = Blueprint('document_bp', __name__, url_prefix='/document')
@@ -103,7 +105,7 @@ def catalogs():
the_catalogs = pagination.items
# prepare table data
rows = prepare_table_for_macro(the_catalogs, [('id', ''), ('name', '')])
rows = prepare_table_for_macro(the_catalogs, [('id', ''), ('name', ''), ('type', '')])
# Render the catalogs in a template
return render_template('document/catalogs.html', rows=rows, pagination=pagination)
@@ -132,9 +134,12 @@ def handle_catalog_selection():
@roles_accepted('Super User', 'Tenant Admin')
def edit_catalog(catalog_id):
catalog = Catalog.query.get_or_404(catalog_id)
form = CatalogForm(obj=catalog)
tenant_id = session.get('tenant').get('id')
form = EditCatalogForm(request.form, obj=catalog)
configuration_config = CATALOG_TYPES[catalog.type]["configuration"]
form.add_dynamic_fields("configuration", configuration_config, catalog.configuration)
# Convert arrays to comma-separated strings for display
if request.method == 'GET':
form.html_tags.data = ', '.join(catalog.html_tags or '')
@@ -155,6 +160,8 @@ def edit_catalog(catalog_id):
if form.html_excluded_elements.data else []
catalog.html_excluded_classes = [cls.strip() for cls in form.html_excluded_classes.data.split(',')] \
if form.html_excluded_classes.data else []
catalog.configuration = form.get_dynamic_data('configuration')
update_logging_information(catalog, dt.now(tz.utc))
try:
db.session.add(catalog)
@@ -173,22 +180,150 @@ def edit_catalog(catalog_id):
return render_template('document/edit_catalog.html', form=form, catalog_id=catalog_id)
@document_bp.route('/retriever', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Tenant Admin')
def retriever():
form = RetrieverForm()
if form.validate_on_submit():
tenant_id = session.get('tenant').get('id')
new_retriever = Retriever()
form.populate_obj(new_retriever)
new_retriever.catalog_id = form.catalog.data.id
set_logging_information(new_retriever, dt.now(tz.utc))
try:
db.session.add(new_retriever)
db.session.commit()
flash('Retriever successfully added!', 'success')
current_app.logger.info(f'Catalog {new_retriever.name} successfully added for tenant {tenant_id}!')
except SQLAlchemyError as e:
db.session.rollback()
flash(f'Failed to add retriever. Error: {e}', 'danger')
current_app.logger.error(f'Failed to add retriever {new_retriever.name}'
f'for tenant {tenant_id}. Error: {str(e)}')
# Enable step 2 of creation of retriever - add configuration of the retriever (dependent on type)
return redirect(prefixed_url_for('document_bp.retriever', retriever_id=new_retriever.id))
return render_template('document/retriever.html', form=form)
@document_bp.route('/retriever/<int:retriever_id>', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Tenant Admin')
def edit_retriever(retriever_id):
"""Edit an existing retriever configuration."""
# Get the retriever or return 404
retriever = Retriever.query.get_or_404(retriever_id)
if retriever.catalog_id:
# If catalog_id is just an ID, fetch the Catalog object
retriever.catalog = Catalog.query.get(retriever.catalog_id)
else:
retriever.catalog = None
# Create form instance with the retriever
form = EditRetrieverForm(request.form, obj=retriever)
configuration_config = RETRIEVER_TYPES[retriever.type]["configuration"]
form.add_dynamic_fields("configuration", configuration_config, retriever.configuration)
if request.method == 'POST':
current_app.logger.debug(f'Received POST request with {request.form}')
if form.validate_on_submit():
# Update basic fields
form.populate_obj(retriever)
retriever.configuration = form.get_dynamic_data('configuration')
# Update catalog relationship
retriever.catalog_id = form.catalog.data.id if form.catalog.data else None
# Update logging information
update_logging_information(retriever, dt.now(tz.utc))
# Save changes to database
try:
db.session.add(retriever)
db.session.commit()
flash('Retriever updated successfully!', 'success')
current_app.logger.info(f'Retriever {retriever.id} updated successfully')
except SQLAlchemyError as e:
db.session.rollback()
flash(f'Failed to update retriever. Error: {str(e)}', 'danger')
current_app.logger.error(f'Failed to update retriever {retriever_id}. Error: {str(e)}')
return render_template('document/edit_retriever.html', form=form, retriever_id=retriever_id)
return redirect(prefixed_url_for('document_bp.retrievers'))
else:
form_validation_failed(request, form)
current_app.logger.debug(f"Rendering Template for {retriever_id}")
return render_template('document/edit_retriever.html', form=form, retriever_id=retriever_id)
@document_bp.route('/retrievers', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Tenant Admin')
def retrievers():
page = request.args.get('page', 1, type=int)
per_page = request.args.get('per_page', 10, type=int)
query = Retriever.query.order_by(Retriever.id)
pagination = query.paginate(page=page, per_page=per_page)
the_retrievers = pagination.items
# prepare table data
rows = prepare_table_for_macro(the_retrievers,
[('id', ''), ('name', ''), ('type', ''), ('catalog_id', '')])
# Render the catalogs in a template
return render_template('document/retrievers.html', rows=rows, pagination=pagination)
@document_bp.route('/handle_retriever_selection', methods=['POST'])
@roles_accepted('Super User', 'Tenant Admin')
def handle_retriever_selection():
retriever_identification = request.form.get('selected_row')
retriever_id = ast.literal_eval(retriever_identification).get('value')
action = request.form['action']
if action == 'edit_retriever':
return redirect(prefixed_url_for('document_bp.edit_retriever', retriever_id=retriever_id))
return redirect(prefixed_url_for('document_bp.retrievers'))
@document_bp.route('/add_document', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Tenant Admin')
def add_document():
form = AddDocumentForm()
form = AddDocumentForm(request.form)
catalog_id = session.get('catalog_id', None)
if catalog_id is None:
flash('You need to set a Session Catalog before adding Documents or URLs')
return redirect(prefixed_url_for('document_bp.catalogs'))
catalog = Catalog.query.get_or_404(catalog_id)
if catalog.configuration and len(catalog.configuration) > 0:
document_version_configurations = CATALOG_TYPES[catalog.type]['document_version_configurations']
for config in document_version_configurations:
form.add_dynamic_fields(config, catalog.configuration[config])
if form.validate_on_submit():
try:
current_app.logger.info(f'Adding Document for {catalog_id}')
tenant_id = session['tenant']['id']
catalog_id = session['catalog_id']
file = form.file.data
filename = secure_filename(file.filename)
extension = filename.rsplit('.', 1)[1].lower()
validate_file_type(extension)
current_app.logger.debug(f'Language on form: {form.language.data}')
catalog_properties = {}
document_version_configurations = CATALOG_TYPES[catalog.type]['document_version_configurations']
for config in document_version_configurations:
catalog_properties[config] = form.get_dynamic_data(config)
api_input = {
'catalog_id': catalog_id,
'name': form.name.data,
@@ -196,6 +331,7 @@ def add_document():
'user_context': form.user_context.data,
'valid_from': form.valid_from.data,
'user_metadata': json.loads(form.user_metadata.data) if form.user_metadata.data else None,
'catalog_properties': catalog_properties,
}
current_app.logger.debug(f'Creating document stack with input {api_input}')
@@ -218,16 +354,30 @@ def add_document():
@document_bp.route('/add_url', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Tenant Admin')
def add_url():
form = AddURLForm()
form = AddURLForm(request.form)
catalog_id = session.get('catalog_id', None)
if catalog_id is None:
flash('You need to set a Session Catalog before adding Documents or URLs')
return redirect(prefixed_url_for('document_bp.catalogs'))
catalog = Catalog.query.get_or_404(catalog_id)
if catalog.configuration and len(catalog.configuration) > 0:
document_version_configurations = CATALOG_TYPES[catalog.type]['document_version_configurations']
for config in document_version_configurations:
form.add_dynamic_fields(config, catalog.configuration[config])
if form.validate_on_submit():
try:
tenant_id = session['tenant']['id']
catalog_id = session['catalog_id']
url = form.url.data
file_content, filename, extension = process_url(url, tenant_id)
catalog_properties = {}
document_version_configurations = CATALOG_TYPES[catalog.type]['document_version_configurations']
for config in document_version_configurations:
catalog_properties[config] = form.get_dynamic_data(config)
api_input = {
'catalog_id': catalog_id,
'name': form.name.data or filename,
@@ -236,6 +386,7 @@ def add_url():
'user_context': form.user_context.data,
'valid_from': form.valid_from.data,
'user_metadata': json.loads(form.user_metadata.data) if form.user_metadata.data else None,
'catalog_properties': catalog_properties,
}
new_doc, new_doc_vers = create_document_stack(api_input, file_content, filename, extension, tenant_id)
@@ -256,43 +407,6 @@ def add_url():
return render_template('document/add_url.html', form=form)
@document_bp.route('/add_urls', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Tenant Admin')
def add_urls():
form = AddURLsForm()
if form.validate_on_submit():
try:
tenant_id = session['tenant']['id']
urls = form.urls.data.split('\n')
urls = [url.strip() for url in urls if url.strip()]
api_input = {
'name': form.name.data,
'language': form.language.data,
'user_context': form.user_context.data,
'valid_from': form.valid_from.data
}
results = process_multiple_urls(urls, tenant_id, api_input)
for result in results:
if result['status'] == 'success':
flash(
f"Processed URL: {result['url']} - Document ID: {result['document_id']}, Version ID: {result['document_version_id']}",
'success')
else:
flash(f"Error processing URL: {result['url']} - {result['message']}", 'error')
return redirect(prefixed_url_for('document_bp.documents'))
except Exception as e:
current_app.logger.error(f'Error adding multiple URLs: {str(e)}')
flash('An error occurred while adding the URLs.', 'error')
return render_template('document/add_urls.html', form=form)
@document_bp.route('/documents', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Tenant Admin')
def documents():
@@ -375,12 +489,29 @@ def edit_document_view(document_id):
@roles_accepted('Super User', 'Tenant Admin')
def edit_document_version_view(document_version_id):
doc_vers = DocumentVersion.query.get_or_404(document_version_id)
form = EditDocumentVersionForm(obj=doc_vers)
form = EditDocumentVersionForm(request.form, obj=doc_vers)
catalog_id = session.get('catalog_id', None)
if catalog_id is None:
flash('You need to set a Session Catalog before adding Documents or URLs')
return redirect(prefixed_url_for('document_bp.catalogs'))
catalog = Catalog.query.get_or_404(catalog_id)
if catalog.configuration and len(catalog.configuration) > 0:
document_version_configurations = CATALOG_TYPES[catalog.type]['document_version_configurations']
for config in document_version_configurations:
form.add_dynamic_fields(config, catalog.configuration[config], doc_vers.catalog_properties[config])
if form.validate_on_submit():
catalog_properties = {}
document_version_configurations = CATALOG_TYPES[catalog.type]['document_version_configurations']
for config in document_version_configurations:
catalog_properties[config] = form.get_dynamic_data(config)
updated_version, error = edit_document_version(
document_version_id,
form.user_context.data
form.user_context.data,
catalog_properties,
)
if updated_version:
flash(f'Document Version {updated_version.id} updated successfully', 'success')
@@ -545,49 +676,3 @@ def fetch_html(url):
response.raise_for_status() # Will raise an exception for bad requests
return response.content
# def prepare_document_data(docs):
# rows = []
# for doc in docs:
# doc_row = [{'value': doc.name, 'class': '', 'type': 'text'},
# {'value': doc.created_at.strftime("%Y-%m-%d %H:%M:%S"), 'class': '', 'type': 'text'}]
# # Document basic details
# if doc.valid_from:
# doc_row.append({'value': doc.valid_from.strftime("%Y-%m-%d"), 'class': '', 'type': 'text'})
# else:
# doc_row.append({'value': '', 'class': '', 'type': 'text'})
#
# # Nested languages and versions
# languages_rows = []
# for lang in doc.languages:
# lang_row = [{'value': lang.language, 'class': '', 'type': 'text'}]
#
# # Latest version details if available (should be available ;-) )
# if lang.latest_version:
# lang_row.append({'value': lang.latest_version.created_at.strftime("%Y-%m-%d %H:%M:%S"),
# 'class': '', 'type': 'text'})
# if lang.latest_version.url:
# lang_row.append({'value': lang.latest_version.url,
# 'class': '', 'type': 'link', 'href': lang.latest_version.url})
# else:
# lang_row.append({'value': '', 'class': '', 'type': 'text'})
#
# if lang.latest_version.object_name:
# lang_row.append({'value': lang.latest_version.object_name, 'class': '', 'type': 'text'})
# else:
# lang_row.append({'value': '', 'class': '', 'type': 'text'})
#
# if lang.latest_version.file_type:
# lang_row.append({'value': lang.latest_version.file_type, 'class': '', 'type': 'text'})
# else:
# lang_row.append({'value': '', 'class': '', 'type': 'text'})
# # Include other details as necessary
#
# languages_rows.append(lang_row)
#
# doc_row.append({'is_group': True, 'colspan': '5',
# 'headers': ['Language', 'Latest Version', 'URL', 'File Name', 'Type'],
# 'sub_rows': languages_rows})
# rows.append(doc_row)
# return rows

View File

@@ -0,0 +1,216 @@
from flask_wtf import FlaskForm
from wtforms import IntegerField, FloatField, BooleanField, StringField, TextAreaField, validators, ValidationError
from flask import current_app
import json
from wtforms.fields.choices import SelectField
from wtforms.fields.datetime import DateField
class DynamicFormBase(FlaskForm):
def __init__(self, formdata=None, *args, **kwargs):
super(DynamicFormBase, self).__init__(*args, **kwargs)
# Maps collection names to lists of field names
self.dynamic_fields = {}
# Store formdata for later use
self.formdata = formdata
def _create_field_validators(self, field_def):
"""Create validators based on field definition"""
validators_list = []
# Required validator
if field_def.get('required', False):
validators_list.append(validators.InputRequired())
else:
validators_list.append(validators.Optional())
# Type-specific validators
field_type = field_def.get('type')
if field_type in ['integer', 'float']:
min_value = field_def.get('min_value')
max_value = field_def.get('max_value')
if min_value is not None or max_value is not None:
validators_list.append(
validators.NumberRange(
min=min_value if min_value is not None else -float('inf'),
max=max_value if max_value is not None else float('inf'),
message=f"Value must be between {min_value or '-∞'} and {max_value or ''}"
)
)
return validators_list
def add_dynamic_fields(self, collection_name, config, initial_data=None):
"""Add dynamic fields to the form based on the configuration."""
self.dynamic_fields[collection_name] = []
for field_name, field_def in config.items():
current_app.logger.debug(f"{field_name}: {field_def}")
# Prefix the field name with the collection name
full_field_name = f"{collection_name}_{field_name}"
label = field_def.get('name', field_name)
field_type = field_def.get('type')
description = field_def.get('description', '')
default = field_def.get('default')
# Determine standard validators
field_validators = self._create_field_validators(field_def)
# Handle special case for tagging_fields
if field_type == 'tagging_fields':
field_class = TextAreaField
field_validators.append(validate_tagging_fields)
extra_classes = 'json-editor'
field_kwargs = {}
elif field_type == 'enum':
field_class = SelectField
allowed_values = field_def.get('allowed_values', [])
choices = [(str(val), str(val)) for val in allowed_values]
extra_classes = ''
field_kwargs = {'choices': choices}
else:
extra_classes = ''
field_class = {
'integer': IntegerField,
'float': FloatField,
'boolean': BooleanField,
'string': StringField,
'date': DateField,
}.get(field_type, StringField)
field_kwargs = {}
# Prepare field data
field_data = None
if initial_data and field_name in initial_data:
field_data = initial_data[field_name]
if field_type == 'tagging_fields' and isinstance(field_data, dict):
try:
field_data = json.dumps(field_data, indent=2)
except (TypeError, ValueError) as e:
current_app.logger.error(f"Error converting initial data to JSON: {e}")
field_data = "{}"
elif default is not None:
field_data = default
# Create render_kw with classes and any other HTML attributes
render_kw = {'class': extra_classes} if extra_classes else {}
if description:
render_kw['title'] = description # For tooltip
render_kw['data-bs-toggle'] = 'tooltip'
render_kw['data-bs-placement'] = 'right'
# Create the field
field_kwargs.update({
'label': label,
'description': description,
'validators': field_validators,
'default': field_data,
'render_kw': render_kw
})
unbound_field = field_class(**field_kwargs)
# Bind the field to the form
bound_field = unbound_field.bind(form=self, name=full_field_name)
# Process the field with formdata
if self.formdata and full_field_name in self.formdata:
bound_field.process(self.formdata)
else:
bound_field.process(formdata=None, data=field_data) # Use prepared field_data
# Add the field to the form
setattr(self, full_field_name, bound_field)
self._fields[full_field_name] = bound_field
self.dynamic_fields[collection_name].append(full_field_name)
def get_static_fields(self):
"""Return a list of static field instances."""
# Get names of dynamic fields
dynamic_field_names = set()
for field_list in self.dynamic_fields.values():
dynamic_field_names.update(field_list)
# Return all fields that are not dynamic
return [field for name, field in self._fields.items() if name not in dynamic_field_names]
def get_dynamic_fields(self):
"""Return a dictionary of dynamic fields per collection."""
result = {}
for collection_name, field_names in self.dynamic_fields.items():
result[collection_name] = [getattr(self, name) for name in field_names]
return result
def get_dynamic_data(self, collection_name):
"""Retrieve the data from dynamic fields of a specific collection."""
data = {}
current_app.logger.debug(f"{collection_name} in {self.dynamic_fields}?")
if collection_name not in self.dynamic_fields:
return data
prefix_length = len(collection_name) + 1 # +1 for the underscore
for full_field_name in self.dynamic_fields[collection_name]:
current_app.logger.debug(f"{full_field_name}: {full_field_name}")
original_field_name = full_field_name[prefix_length:]
current_app.logger.debug(f"{original_field_name}: {original_field_name}")
field = getattr(self, full_field_name)
current_app.logger.debug(f"{field}: {field}")
# Parse JSON for tagging_fields type
if isinstance(field, TextAreaField) and field.data:
try:
data[original_field_name] = json.loads(field.data)
except json.JSONDecodeError:
# Validation should catch this, but just in case
data[original_field_name] = field.data
else:
data[original_field_name] = field.data
return data
def validate_tagging_fields(form, field):
"""Validate the tagging fields structure"""
if not field.data:
return
try:
# Parse JSON data
fields_data = json.loads(field.data)
# Validate it's a dictionary
if not isinstance(fields_data, dict):
raise ValidationError("Tagging fields must be a dictionary")
# Validate each field definition
for field_name, field_def in fields_data.items():
if not isinstance(field_def, dict):
raise ValidationError(f"Field definition for {field_name} must be a dictionary")
# Check required properties
if 'type' not in field_def:
raise ValidationError(f"Field {field_name} missing required 'type' property")
# Validate type
if field_def['type'] not in ['string', 'integer', 'float', 'date', 'enum']:
raise ValidationError(f"Field {field_name} has invalid type: {field_def['type']}")
# Validate enum fields have allowed_values
if field_def['type'] == 'enum':
if 'allowed_values' not in field_def:
raise ValidationError(f"Enum field {field_name} missing required 'allowed_values' list")
if not isinstance(field_def['allowed_values'], list):
raise ValidationError(f"Field {field_name} allowed_values must be a list")
# Validate numeric fields
if field_def['type'] in ['integer', 'float']:
if 'min_value' in field_def and 'max_value' in field_def:
min_val = float(field_def['min_value'])
max_val = float(field_def['max_value'])
if min_val >= max_val:
raise ValidationError(f"Field {field_name} min_value must be less than max_value")
except json.JSONDecodeError:
raise ValidationError("Invalid JSON format")
except (TypeError, ValueError) as e:
raise ValidationError(f"Invalid field definition: {str(e)}")

View File

@@ -1,29 +1,22 @@
from datetime import datetime as dt, timezone as tz
from flask import current_app, session
from flask import current_app
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain.globals import set_debug
from sqlalchemy.exc import SQLAlchemyError
from celery import states
from celery.exceptions import Ignore
import os
# OpenAI imports
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.summarize import load_summarize_chain
from langchain.text_splitter import CharacterTextSplitter
from langchain_core.exceptions import LangChainException
from common.utils.database import Database
from common.models.document import DocumentVersion, EmbeddingMistral, EmbeddingSmallOpenAI, Embedding
from common.models.document import Embedding
from common.models.user import Tenant
from common.models.interaction import ChatSession, Interaction, InteractionEmbedding
from common.extensions import db
from common.utils.celery_utils import current_celery
from common.utils.model_utils import select_model_variables, create_language_template, replace_variable_in_template
from common.langchain.eveai_retriever import EveAIRetriever
from common.langchain.eveai_history_retriever import EveAIHistoryRetriever
from common.langchain.retrievers.eveai_default_rag_retriever import EveAIDefaultRagRetriever
from common.langchain.retrievers.eveai_history_retriever import EveAIHistoryRetriever
from common.utils.business_event import BusinessEvent
from common.utils.business_event_context import current_event
@@ -139,7 +132,7 @@ def answer_using_tenant_rag(question, language, tenant, chat_session):
new_interaction.detailed_question_at = dt.now(tz.utc)
with current_event.create_span("Generate Answer using RAG"):
retriever = EveAIRetriever(model_variables, tenant_info)
retriever = EveAIDefaultRagRetriever(model_variables, tenant_info)
llm = model_variables['llm']
template = model_variables['rag_template']
language_template = create_language_template(template, language)
@@ -243,7 +236,7 @@ def answer_using_llm(question, language, tenant, chat_session):
new_interaction.detailed_question_at = dt.now(tz.utc)
with current_event.create_span("Detail Answer using LLM"):
retriever = EveAIRetriever(model_variables, tenant_info)
retriever = EveAIDefaultRagRetriever(model_variables, tenant_info)
llm = model_variables['llm_no_rag']
template = model_variables['encyclopedia_template']
language_template = create_language_template(template, language)

View File

@@ -0,0 +1,29 @@
"""Add tuning to Retriever
Revision ID: 331f8100eb87
Revises: 6c5ca750e60c
Create Date: 2024-10-31 07:17:22.579376
"""
from alembic import op
import sqlalchemy as sa
import pgvector
# revision identifiers, used by Alembic.
revision = '331f8100eb87'
down_revision = '6c5ca750e60c'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('retriever', sa.Column('tuning', sa.Boolean(), nullable=True))
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column('retriever', 'tuning')
# ### end Alembic commands ###

View File

@@ -0,0 +1,56 @@
"""Add Retriever Model
Revision ID: 3717364e6429
Revises: 7b7b566e667f
Create Date: 2024-10-21 14:22:30.258679
"""
from alembic import op
import sqlalchemy as sa
import pgvector
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = '3717364e6429'
down_revision = '7b7b566e667f'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('retriever',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('name', sa.String(length=50), nullable=False),
sa.Column('description', sa.Text(), nullable=True),
sa.Column('catalog_id', sa.Integer(), nullable=True),
sa.Column('type', sa.String(length=50), nullable=False),
sa.Column('user_metadata', postgresql.JSONB(astext_type=sa.Text()), nullable=True),
sa.Column('system_metadata', postgresql.JSONB(astext_type=sa.Text()), nullable=True),
sa.Column('configuration', postgresql.JSONB(astext_type=sa.Text()), nullable=True),
sa.Column('created_at', sa.DateTime(), server_default=sa.text('now()'), nullable=False),
sa.Column('created_by', sa.Integer(), nullable=True),
sa.Column('updated_at', sa.DateTime(), server_default=sa.text('now()'), nullable=False),
sa.Column('updated_by', sa.Integer(), nullable=True),
sa.ForeignKeyConstraint(['catalog_id'], ['catalog.id'], ),
sa.ForeignKeyConstraint(['created_by'], ['public.user.id'], ),
sa.ForeignKeyConstraint(['updated_by'], ['public.user.id'], ),
sa.PrimaryKeyConstraint('id')
)
op.drop_column('catalog', 'es_similarity_threshold')
op.drop_column('catalog', 'es_k')
op.drop_column('catalog', 'rag_tuning')
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('catalog', sa.Column('rag_tuning', sa.BOOLEAN(), autoincrement=False, nullable=True))
op.add_column('catalog', sa.Column('es_k', sa.INTEGER(), autoincrement=False, nullable=True))
op.add_column('catalog', sa.Column('es_similarity_threshold', sa.DOUBLE_PRECISION(precision=53), autoincrement=False, nullable=True))
op.drop_constraint(None, 'catalog', type_='foreignkey')
op.drop_constraint(None, 'catalog', type_='foreignkey')
op.create_foreign_key('catalog_updated_by_fkey', 'catalog', 'user', ['updated_by'], ['id'])
op.create_foreign_key('catalog_created_by_fkey', 'catalog', 'user', ['created_by'], ['id'])
op.drop_table('retriever')
# ### end Alembic commands ###

View File

@@ -0,0 +1,29 @@
"""Add catalog_properties to DocumentVersion
Revision ID: 6c5ca750e60c
Revises: b64d5cf32c7a
Create Date: 2024-10-29 08:33:54.663211
"""
from alembic import op
import sqlalchemy as sa
import pgvector
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = '6c5ca750e60c'
down_revision = 'b64d5cf32c7a'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('document_version', sa.Column('catalog_properties', postgresql.JSONB(astext_type=sa.Text()), nullable=True))
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column('document_version', 'catalog_properties')
# ### end Alembic commands ###

View File

@@ -0,0 +1,46 @@
"""Extensions for more Catalog Types in Catalog Model
Revision ID: 7b7b566e667f
Revises: 28984b05d396
Create Date: 2024-10-21 07:39:52.260054
"""
from alembic import op
import sqlalchemy as sa
import pgvector
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = '7b7b566e667f'
down_revision = '28984b05d396'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('catalog', sa.Column('parent_id', sa.Integer(), nullable=True))
op.add_column('catalog', sa.Column('type', sa.String(length=50), nullable=False, server_default='DEFAULT'))
# Update all existing rows to have the 'type' set to 'DEFAULT'
op.execute("UPDATE catalog SET type='DEFAULT' WHERE type IS NULL")
# Remove the server default (optional but recommended)
op.alter_column('catalog', 'type', server_default=None)
op.add_column('catalog', sa.Column('user_metadata', postgresql.JSONB(astext_type=sa.Text()), nullable=True))
op.add_column('catalog', sa.Column('system_metadata', postgresql.JSONB(astext_type=sa.Text()), nullable=True))
op.add_column('catalog', sa.Column('configuration', postgresql.JSONB(astext_type=sa.Text()), nullable=True))
op.create_foreign_key(None, 'catalog', 'catalog', ['parent_id'], ['id'])
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_constraint(None, 'catalog', type_='foreignkey')
op.drop_column('catalog', 'configuration')
op.drop_column('catalog', 'system_metadata')
op.drop_column('catalog', 'user_metadata')
op.drop_column('catalog', 'type')
op.drop_column('catalog', 'parent_id')
# ### end Alembic commands ###

View File

@@ -0,0 +1,33 @@
"""Removing Parent Catalog ID from Catalog ==> use tags in user_metadata instead
Revision ID: b64d5cf32c7a
Revises: 3717364e6429
Create Date: 2024-10-28 07:48:04.624298
"""
from alembic import op
import sqlalchemy as sa
import pgvector
# revision identifiers, used by Alembic.
revision = 'b64d5cf32c7a'
down_revision = '3717364e6429'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_constraint('catalog_parent_id_fkey', 'catalog', type_='foreignkey')
op.drop_column('catalog', 'parent_id')
op.drop_constraint('chat_session_user_id_fkey', 'chat_session', type_='foreignkey')
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('catalog', sa.Column('parent_id', sa.INTEGER(), autoincrement=False, nullable=True))
op.create_foreign_key('catalog_parent_id_fkey', 'catalog', 'catalog', ['parent_id'], ['id'])
# ### end Alembic commands ###

View File

@@ -48,6 +48,9 @@ http {
#access_log logs/host.access.log main;
location = / {
return 301 /admin/;
}
location / {
root /etc/nginx/public;
index index.html index.htm;

View File

@@ -82,3 +82,4 @@ prometheus-client~=0.20.0
flower~=2.0.1
psutil~=6.0.0
celery-redbeat~=2.2.0
WTForms-SQLAlchemy~=0.4.1