9 Commits

Author SHA1 Message Date
Josako
5e77b478dd - Release script added to tag in both git and docker 2024-10-17 11:22:18 +02:00
Josako
6f71259822 - Changelog update 2024-10-17 10:35:51 +02:00
Josako
74cc7ae95e - Adapt Sync Wordpress Component to Catalog introduction
- Small bug fixes
2024-10-17 10:31:13 +02:00
Josako
7f12c8b355 - Remove obsolete fields from Tenant model (Catalog introduction) 2024-10-16 13:59:57 +02:00
Josako
6069f5f7e5 - Catalog functionality integrated into document and document_version views
- small bugfixes and improvements
2024-10-16 13:09:19 +02:00
Josako
3e644f1652 - Add Catalog Functionality 2024-10-15 18:14:57 +02:00
Josako
3316a8bc47 - Small changes to show when upgrades are finished 2024-10-14 16:40:56 +02:00
Josako
270479c77d - Add Catalog Concept to Document Domain
- Create Catalog views
- Modify document stack creation
2024-10-14 13:56:23 +02:00
Josako
0f4558d775 - Small fix in interaction view, as it still refered to file_name 2024-10-11 18:14:35 +02:00
42 changed files with 1180 additions and 372 deletions

View File

@@ -25,6 +25,39 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Security
- In case of vulnerabilities.
## [1.0.13-alfa]
### Added
- Finished Catalog introduction
- Reinitialization of WordPress site for syncing
### Changed
- Modification of WordPress Sync Component
- Cleanup of attributes in Tenant
### Fixed
- Overall bugfixes as result from the Catalog introduction
## [1.0.12-alfa]
### Added
- Added Catalog functionality
### Changed
- For changes in existing functionality.
### Deprecated
- For soon-to-be removed features.
### Removed
- For now removed features.
### Fixed
- Set default language when registering Documents or URLs.
### Security
- In case of vulnerabilities.
## [1.0.11-alfa]
### Added
@@ -37,7 +70,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- repopack can now split for different components
### Fixed
- Various fixes as consequece of changing file_location / file_name ==> bucket_name / object_name
- Various fixes as consequence of changing file_location / file_name ==> bucket_name / object_name
- Celery Routing / Queuing updated
## [1.0.10-alfa]

View File

@@ -38,7 +38,7 @@ class EveAIRetriever(BaseRetriever, BaseModel):
similarity_threshold = self.model_variables['similarity_threshold']
k = self.model_variables['k']
if self.tenant_info['rag_tuning']:
if self.model_variables['rag_tuning']:
try:
current_date = get_date_in_timezone(self.tenant_info['timezone'])
current_app.rag_tuning_logger.debug(f'Current date: {current_date}\n')
@@ -73,7 +73,7 @@ class EveAIRetriever(BaseRetriever, BaseModel):
current_app.logger.error(f'Error generating overview: {e}')
db.session.rollback()
if self.tenant_info['rag_tuning']:
if self.model_variables['rag_tuning']:
current_app.rag_tuning_logger.debug(f'Parameters for Retrieval of documents: \n')
current_app.rag_tuning_logger.debug(f'Similarity Threshold: {similarity_threshold}\n')
current_app.rag_tuning_logger.debug(f'K: {k}\n')
@@ -106,14 +106,14 @@ class EveAIRetriever(BaseRetriever, BaseModel):
.limit(k)
)
if self.tenant_info['rag_tuning']:
if self.model_variables['rag_tuning']:
current_app.rag_tuning_logger.debug(f'Query executed for Retrieval of documents: \n')
current_app.rag_tuning_logger.debug(f'{query_obj.statement}\n')
current_app.rag_tuning_logger.debug(f'---------------------------------------\n')
res = query_obj.all()
if self.tenant_info['rag_tuning']:
if self.model_variables['rag_tuning']:
current_app.rag_tuning_logger.debug(f'Retrieved {len(res)} relevant documents \n')
current_app.rag_tuning_logger.debug(f'Data retrieved: \n')
current_app.rag_tuning_logger.debug(f'{res}\n')
@@ -121,7 +121,7 @@ class EveAIRetriever(BaseRetriever, BaseModel):
result = []
for doc in res:
if self.tenant_info['rag_tuning']:
if self.model_variables['rag_tuning']:
current_app.rag_tuning_logger.debug(f'Document ID: {doc[0].id} - Distance: {doc[1]}\n')
current_app.rag_tuning_logger.debug(f'Chunk: \n {doc[0].chunk}\n\n')
result.append(f'SOURCE: {doc[0].id}\n\n{doc[0].chunk}\n\n')

View File

@@ -2,12 +2,49 @@ from common.extensions import db
from .user import User, Tenant
from pgvector.sqlalchemy import Vector
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.dialects.postgresql import ARRAY
import sqlalchemy as sa
class Catalog(db.Model):
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String(50), nullable=False)
description = db.Column(db.Text, nullable=True)
# Embedding variables
html_tags = db.Column(ARRAY(sa.String(10)), nullable=True, default=['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'li'])
html_end_tags = db.Column(ARRAY(sa.String(10)), nullable=True, default=['p', 'li'])
html_included_elements = db.Column(ARRAY(sa.String(50)), nullable=True)
html_excluded_elements = db.Column(ARRAY(sa.String(50)), nullable=True)
html_excluded_classes = db.Column(ARRAY(sa.String(200)), nullable=True)
min_chunk_size = db.Column(db.Integer, nullable=True, default=2000)
max_chunk_size = db.Column(db.Integer, nullable=True, default=3000)
# Embedding search variables ==> move to specialist?
es_k = db.Column(db.Integer, nullable=True, default=8)
es_similarity_threshold = db.Column(db.Float, nullable=True, default=0.4)
# Chat variables ==> Move to Specialist?
chat_RAG_temperature = db.Column(db.Float, nullable=True, default=0.3)
chat_no_RAG_temperature = db.Column(db.Float, nullable=True, default=0.5)
# Tuning enablers
embed_tuning = db.Column(db.Boolean, nullable=True, default=False)
rag_tuning = db.Column(db.Boolean, nullable=True, default=False) # Move to Specialist?
# Versioning Information
created_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now())
created_by = db.Column(db.Integer, db.ForeignKey(User.id), nullable=True)
updated_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now(), onupdate=db.func.now())
updated_by = db.Column(db.Integer, db.ForeignKey(User.id))
class Document(db.Model):
id = db.Column(db.Integer, primary_key=True)
# tenant_id = db.Column(db.Integer, db.ForeignKey(Tenant.id), nullable=False)
catalog_id = db.Column(db.Integer, db.ForeignKey(Catalog.id), nullable=True)
name = db.Column(db.String(100), nullable=False)
tenant_id = db.Column(db.Integer, db.ForeignKey(Tenant.id), nullable=False)
valid_from = db.Column(db.DateTime, nullable=True)
valid_to = db.Column(db.DateTime, nullable=True)

View File

@@ -34,32 +34,32 @@ class Tenant(db.Model):
embedding_model = db.Column(db.String(50), nullable=True)
llm_model = db.Column(db.String(50), nullable=True)
# Embedding variables
html_tags = db.Column(ARRAY(sa.String(10)), nullable=True, default=['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'li'])
html_end_tags = db.Column(ARRAY(sa.String(10)), nullable=True, default=['p', 'li'])
html_included_elements = db.Column(ARRAY(sa.String(50)), nullable=True)
html_excluded_elements = db.Column(ARRAY(sa.String(50)), nullable=True)
html_excluded_classes = db.Column(ARRAY(sa.String(200)), nullable=True)
min_chunk_size = db.Column(db.Integer, nullable=True, default=2000)
max_chunk_size = db.Column(db.Integer, nullable=True, default=3000)
# Embedding search variables
es_k = db.Column(db.Integer, nullable=True, default=5)
es_similarity_threshold = db.Column(db.Float, nullable=True, default=0.7)
# Chat variables
chat_RAG_temperature = db.Column(db.Float, nullable=True, default=0.3)
chat_no_RAG_temperature = db.Column(db.Float, nullable=True, default=0.5)
# # Embedding variables ==> To be removed once all migrations (dev + prod) have been done
# html_tags = db.Column(ARRAY(sa.String(10)), nullable=True, default=['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'li'])
# html_end_tags = db.Column(ARRAY(sa.String(10)), nullable=True, default=['p', 'li'])
# html_included_elements = db.Column(ARRAY(sa.String(50)), nullable=True)
# html_excluded_elements = db.Column(ARRAY(sa.String(50)), nullable=True)
# html_excluded_classes = db.Column(ARRAY(sa.String(200)), nullable=True)
#
# min_chunk_size = db.Column(db.Integer, nullable=True, default=2000)
# max_chunk_size = db.Column(db.Integer, nullable=True, default=3000)
#
# # Embedding search variables
# es_k = db.Column(db.Integer, nullable=True, default=5)
# es_similarity_threshold = db.Column(db.Float, nullable=True, default=0.7)
#
# # Chat variables
# chat_RAG_temperature = db.Column(db.Float, nullable=True, default=0.3)
# chat_no_RAG_temperature = db.Column(db.Float, nullable=True, default=0.5)
fallback_algorithms = db.Column(ARRAY(sa.String(50)), nullable=True)
# Licensing Information
encrypted_chat_api_key = db.Column(db.String(500), nullable=True)
encrypted_api_key = db.Column(db.String(500), nullable=True)
# Tuning enablers
embed_tuning = db.Column(db.Boolean, nullable=True, default=False)
rag_tuning = db.Column(db.Boolean, nullable=True, default=False)
# # Tuning enablers
# embed_tuning = db.Column(db.Boolean, nullable=True, default=False)
# rag_tuning = db.Column(db.Boolean, nullable=True, default=False)
# Entitlements
currency = db.Column(db.String(20), nullable=True)
@@ -96,20 +96,7 @@ class Tenant(db.Model):
'allowed_languages': self.allowed_languages,
'embedding_model': self.embedding_model,
'llm_model': self.llm_model,
'html_tags': self.html_tags,
'html_end_tags': self.html_end_tags,
'html_included_elements': self.html_included_elements,
'html_excluded_elements': self.html_excluded_elements,
'html_excluded_classes': self.html_excluded_classes,
'min_chunk_size': self.min_chunk_size,
'max_chunk_size': self.max_chunk_size,
'es_k': self.es_k,
'es_similarity_threshold': self.es_similarity_threshold,
'chat_RAG_temperature': self.chat_RAG_temperature,
'chat_no_RAG_temperature': self.chat_no_RAG_temperature,
'fallback_algorithms': self.fallback_algorithms,
'embed_tuning': self.embed_tuning,
'rag_tuning': self.rag_tuning,
'currency': self.currency,
'usage_email': self.usage_email,
}

View File

@@ -17,11 +17,12 @@ from ..models.user import Tenant
def create_document_stack(api_input, file, filename, extension, tenant_id):
# Create the Document
new_doc = create_document(api_input, filename, tenant_id)
catalog_id = int(api_input.get('catalog_id'))
new_doc = create_document(api_input, filename, catalog_id)
db.session.add(new_doc)
# Create the DocumentVersion
new_doc_vers = create_version_for_document(new_doc,
new_doc_vers = create_version_for_document(new_doc, tenant_id,
api_input.get('url', ''),
api_input.get('language', 'en'),
api_input.get('user_context', ''),
@@ -45,7 +46,7 @@ def create_document_stack(api_input, file, filename, extension, tenant_id):
return new_doc, new_doc_vers
def create_document(form, filename, tenant_id):
def create_document(form, filename, catalog_id):
new_doc = Document()
if form['name'] == '':
new_doc.name = filename.rsplit('.', 1)[0]
@@ -56,13 +57,13 @@ def create_document(form, filename, tenant_id):
new_doc.valid_from = form['valid_from']
else:
new_doc.valid_from = dt.now(tz.utc)
new_doc.tenant_id = tenant_id
new_doc.catalog_id = catalog_id
set_logging_information(new_doc, dt.now(tz.utc))
return new_doc
def create_version_for_document(document, url, language, user_context, user_metadata):
def create_version_for_document(document, tenant_id, url, language, user_context, user_metadata):
new_doc_vers = DocumentVersion()
if url != '':
new_doc_vers.url = url
@@ -82,7 +83,7 @@ def create_version_for_document(document, url, language, user_context, user_meta
set_logging_information(new_doc_vers, dt.now(tz.utc))
mark_tenant_storage_dirty(document.tenant_id)
mark_tenant_storage_dirty(tenant_id)
return new_doc_vers
@@ -286,7 +287,7 @@ def edit_document_version(version_id, user_context):
return None, str(e)
def refresh_document_with_info(doc_id, api_input):
def refresh_document_with_info(doc_id, tenant_id, api_input):
doc = Document.query.get_or_404(doc_id)
old_doc_vers = DocumentVersion.query.filter_by(doc_id=doc_id).order_by(desc(DocumentVersion.id)).first()
@@ -294,11 +295,11 @@ def refresh_document_with_info(doc_id, api_input):
return None, "This document has no URL. Only documents with a URL can be refreshed."
new_doc_vers = create_version_for_document(
doc,
doc, tenant_id,
old_doc_vers.url,
api_input.get('language', old_doc_vers.language),
api_input.get('user_context', old_doc_vers.user_context),
api_input.get('user_metadata', old_doc_vers.user_metadata)
api_input.get('user_metadata', old_doc_vers.user_metadata),
)
set_logging_information(new_doc_vers, dt.now(tz.utc))
@@ -318,9 +319,9 @@ def refresh_document_with_info(doc_id, api_input):
response.raise_for_status()
file_content = response.content
upload_file_for_version(new_doc_vers, file_content, extension, doc.tenant_id)
upload_file_for_version(new_doc_vers, file_content, extension, tenant_id)
task = current_celery.send_task('create_embeddings', args=[doc.tenant_id, new_doc_vers.id,], queue='embeddings')
task = current_celery.send_task('create_embeddings', args=[tenant_id, new_doc_vers.id,], queue='embeddings')
current_app.logger.info(f'Embedding creation started for document {doc_id} on version {new_doc_vers.id} '
f'with task id: {task.id}.')
@@ -328,7 +329,7 @@ def refresh_document_with_info(doc_id, api_input):
# Update the existing refresh_document function to use the new refresh_document_with_info
def refresh_document(doc_id):
def refresh_document(doc_id, tenant_id):
current_app.logger.info(f'Refreshing document {doc_id}')
doc = Document.query.get_or_404(doc_id)
old_doc_vers = DocumentVersion.query.filter_by(doc_id=doc_id).order_by(desc(DocumentVersion.id)).first()
@@ -339,11 +340,11 @@ def refresh_document(doc_id):
'user_metadata': old_doc_vers.user_metadata
}
return refresh_document_with_info(doc_id, api_input)
return refresh_document_with_info(doc_id, tenant_id, api_input)
# Function triggered when a document_version is created or updated
def mark_tenant_storage_dirty(tenant_id):
tenant = db.session.query(Tenant).filter_by(id=tenant_id).first()
tenant = db.session.query(Tenant).filter_by(id=int(tenant_id)).first()
tenant.storage_dirty = True
db.session.commit()

View File

@@ -14,7 +14,7 @@ from portkey_ai.langchain.portkey_langchain_callback_handler import LangchainCal
from common.langchain.llm_metrics_handler import LLMMetricsHandler
from common.langchain.tracked_openai_embeddings import TrackedOpenAIEmbeddings
from common.langchain.tracked_transcribe import tracked_transcribe
from common.models.document import EmbeddingSmallOpenAI, EmbeddingLargeOpenAI
from common.models.document import EmbeddingSmallOpenAI, EmbeddingLargeOpenAI, Catalog
from common.models.user import Tenant
from config.model_config import MODEL_CONFIG
from common.utils.business_event_context import current_event
@@ -42,8 +42,9 @@ def set_language_prompt_template(cls, language_prompt):
class ModelVariables(MutableMapping):
def __init__(self, tenant: Tenant):
def __init__(self, tenant: Tenant, catalog_id=None):
self.tenant = tenant
self.catalog_id = catalog_id
self._variables = self._initialize_variables()
self._embedding_model = None
self._llm = None
@@ -57,25 +58,32 @@ class ModelVariables(MutableMapping):
def _initialize_variables(self):
variables = {}
# We initialize the variables that are available knowing the tenant. For the other, we will apply 'lazy loading'
variables['k'] = self.tenant.es_k or 5
variables['similarity_threshold'] = self.tenant.es_similarity_threshold or 0.7
variables['RAG_temperature'] = self.tenant.chat_RAG_temperature or 0.3
variables['no_RAG_temperature'] = self.tenant.chat_no_RAG_temperature or 0.5
variables['embed_tuning'] = self.tenant.embed_tuning or False
variables['rag_tuning'] = self.tenant.rag_tuning or False
# Get the Catalog if catalog_id is passed
if self.catalog_id:
catalog = Catalog.query.get_or_404(self.catalog_id)
# We initialize the variables that are available knowing the tenant.
variables['embed_tuning'] = catalog.embed_tuning or False
# Set HTML Chunking Variables
variables['html_tags'] = catalog.html_tags
variables['html_end_tags'] = catalog.html_end_tags
variables['html_included_elements'] = catalog.html_included_elements
variables['html_excluded_elements'] = catalog.html_excluded_elements
variables['html_excluded_classes'] = catalog.html_excluded_classes
# Set Chunk Size variables
variables['min_chunk_size'] = catalog.min_chunk_size
variables['max_chunk_size'] = catalog.max_chunk_size
# Set the RAG Context (will have to change once specialists are defined
variables['rag_context'] = self.tenant.rag_context or " "
# Set HTML Chunking Variables
variables['html_tags'] = self.tenant.html_tags
variables['html_end_tags'] = self.tenant.html_end_tags
variables['html_included_elements'] = self.tenant.html_included_elements
variables['html_excluded_elements'] = self.tenant.html_excluded_elements
variables['html_excluded_classes'] = self.tenant.html_excluded_classes
# Set Chunk Size variables
variables['min_chunk_size'] = self.tenant.min_chunk_size
variables['max_chunk_size'] = self.tenant.max_chunk_size
# Temporary setting until we have Specialists
variables['rag_tuning'] = False
variables['RAG_temperature'] = 0.3
variables['no_RAG_temperature'] = 0.5
variables['k'] = 8
variables['similarity_threshold'] = 0.4
# Set model providers
variables['embedding_provider'], variables['embedding_model'] = self.tenant.embedding_model.rsplit('.', 1)
@@ -195,7 +203,12 @@ class ModelVariables(MutableMapping):
return self.transcription_client
elif key in self._variables.get('prompt_templates', []):
return self.get_prompt_template(key)
return self._variables.get(key)
else:
value = self._variables.get(key)
if value is not None:
return value
else:
raise KeyError(f'Variable {key} does not exist in ModelVariables')
def __setitem__(self, key: str, value: Any) -> None:
self._variables[key] = value
@@ -225,8 +238,8 @@ class ModelVariables(MutableMapping):
return self._variables.values()
def select_model_variables(tenant):
model_variables = ModelVariables(tenant=tenant)
def select_model_variables(tenant, catalog_id=None):
model_variables = ModelVariables(tenant=tenant, catalog_id=catalog_id)
return model_variables

View File

@@ -37,7 +37,7 @@ LOGGING = {
'level': 'DEBUG',
'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/eveai_app.log',
'maxBytes': 1024 * 1024 * 5, # 5MB
'maxBytes': 1024 * 1024 * 1, # 1MB
'backupCount': 10,
'formatter': 'standard',
},
@@ -45,7 +45,7 @@ LOGGING = {
'level': 'DEBUG',
'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/eveai_workers.log',
'maxBytes': 1024 * 1024 * 5, # 5MB
'maxBytes': 1024 * 1024 * 1, # 1MB
'backupCount': 10,
'formatter': 'standard',
},
@@ -53,7 +53,7 @@ LOGGING = {
'level': 'DEBUG',
'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/eveai_chat.log',
'maxBytes': 1024 * 1024 * 5, # 5MB
'maxBytes': 1024 * 1024 * 1, # 1MB
'backupCount': 10,
'formatter': 'standard',
},
@@ -61,7 +61,7 @@ LOGGING = {
'level': 'DEBUG',
'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/eveai_chat_workers.log',
'maxBytes': 1024 * 1024 * 5, # 5MB
'maxBytes': 1024 * 1024 * 1, # 1MB
'backupCount': 10,
'formatter': 'standard',
},
@@ -69,7 +69,7 @@ LOGGING = {
'level': 'DEBUG',
'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/eveai_api.log',
'maxBytes': 1024 * 1024 * 5, # 5MB
'maxBytes': 1024 * 1024 * 1, # 1MB
'backupCount': 10,
'formatter': 'standard',
},
@@ -77,7 +77,7 @@ LOGGING = {
'level': 'DEBUG',
'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/eveai_beat.log',
'maxBytes': 1024 * 1024 * 5, # 5MB
'maxBytes': 1024 * 1024 * 1, # 1MB
'backupCount': 10,
'formatter': 'standard',
},
@@ -85,7 +85,7 @@ LOGGING = {
'level': 'DEBUG',
'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/eveai_entitlements.log',
'maxBytes': 1024 * 1024 * 5, # 5MB
'maxBytes': 1024 * 1024 * 1, # 1MB
'backupCount': 10,
'formatter': 'standard',
},
@@ -93,7 +93,7 @@ LOGGING = {
'level': 'DEBUG',
'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/sqlalchemy.log',
'maxBytes': 1024 * 1024 * 5, # 5MB
'maxBytes': 1024 * 1024 * 1, # 1MB
'backupCount': 10,
'formatter': 'standard',
},
@@ -101,7 +101,7 @@ LOGGING = {
'level': 'DEBUG',
'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/mailman.log',
'maxBytes': 1024 * 1024 * 5, # 5MB
'maxBytes': 1024 * 1024 * 1, # 1MB
'backupCount': 10,
'formatter': 'standard',
},
@@ -109,7 +109,7 @@ LOGGING = {
'level': 'DEBUG',
'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/security.log',
'maxBytes': 1024 * 1024 * 5, # 5MB
'maxBytes': 1024 * 1024 * 1, # 1MB
'backupCount': 10,
'formatter': 'standard',
},
@@ -117,7 +117,7 @@ LOGGING = {
'level': 'DEBUG',
'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/rag_tuning.log',
'maxBytes': 1024 * 1024 * 5, # 5MB
'maxBytes': 1024 * 1024 * 1, # 1MB
'backupCount': 10,
'formatter': 'standard',
},
@@ -125,7 +125,7 @@ LOGGING = {
'level': 'DEBUG',
'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/embed_tuning.log',
'maxBytes': 1024 * 1024 * 5, # 5MB
'maxBytes': 1024 * 1024 * 1, # 1MB
'backupCount': 10,
'formatter': 'standard',
},
@@ -133,7 +133,7 @@ LOGGING = {
'level': 'INFO',
'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/business_events.log',
'maxBytes': 1024 * 1024 * 5, # 5MB
'maxBytes': 1024 * 1024 * 1, # 1MB
'backupCount': 10,
'formatter': 'standard',
},

View File

@@ -0,0 +1,60 @@
#!/bin/bash
# Initialize variables
RELEASE_VERSION=""
RELEASE_MESSAGE=""
DOCKER_ACCOUNT="josakola" # Your Docker account name
# Parse input arguments
while getopts r:m: flag
do
case "${flag}" in
r) RELEASE_VERSION=${OPTARG};;
m) RELEASE_MESSAGE=${OPTARG};;
*)
echo "Usage: $0 -r <release_version> -m <release_message>"
exit 1 ;;
esac
done
# Ensure both version and message are provided
if [ -z "$RELEASE_VERSION" ]; then
echo "Error: Release version not provided. Use -r <release_version>"
exit 1
fi
if [ -z "$RELEASE_MESSAGE" ]; then
echo "Error: Release message not provided. Use -m <release_message>"
exit 1
fi
# Path to your docker-compose file
DOCKER_COMPOSE_FILE="compose_dev.yaml"
# Get all the services defined in the docker-compose file
SERVICES=$(docker-compose -f $DOCKER_COMPOSE_FILE config --services)
# Tag and push images for all services that belong to your Docker account
for SERVICE in $SERVICES; do
DOCKER_IMAGE="your-docker-repo/$SERVICE"
# Check if the image starts with your Docker account name
if [[ $DOCKER_IMAGE == $DOCKER_ACCOUNT* ]]; then
echo "Tagging Docker image for service: $SERVICE with version: $RELEASE_VERSION"
# Tag the 'latest' image with the new release version
docker tag $DOCKER_IMAGE:latest $DOCKER_IMAGE:$RELEASE_VERSION
# Push the newly tagged image to Docker Hub
docker push $DOCKER_IMAGE:$RELEASE_VERSION
else
echo "Skipping service: $SERVICE (not part of Docker account $DOCKER_ACCOUNT)"
fi
done
# Step 3: Tag the Git repository with the release version
echo "Tagging Git repository with version: $RELEASE_VERSION"
git tag -a v$RELEASE_VERSION -m "Release $RELEASE_VERSION: $RELEASE_MESSAGE"
git push origin v$RELEASE_VERSION
echo "Release process completed for version: $RELEASE_VERSION"

View File

@@ -56,13 +56,6 @@ def create_app(config_file=None):
app.logger.debug(f'Request URL: {request.url}')
app.logger.debug(f'Request headers: {dict(request.headers)}')
# Log request arguments
app.logger.debug(f'Request args: {request.args}')
# Log form data if it's a POST request
if request.method == 'POST':
app.logger.debug(f'Form data: {request.form}')
# Log JSON data if the content type is application/json
if request.is_json:
app.logger.debug(f'JSON data: {request.json}')
@@ -95,6 +88,10 @@ def create_app(config_file=None):
# Don't raise the exception here, let the request continue
# The appropriate error handling will be done in the specific endpoints
@app.route('/api/v1')
def swagger():
return api_rest.render_doc()
return app

View File

@@ -33,6 +33,7 @@ document_ns = Namespace('documents', description='Document related operations')
# Define models for request parsing and response serialization
upload_parser = reqparse.RequestParser()
upload_parser.add_argument('catalog_id', location='form', type=int, required=True, help='The catalog to add the file to')
upload_parser.add_argument('file', location='files', type=FileStorage, required=True, help='The file to upload')
upload_parser.add_argument('name', location='form', type=str, required=False, help='Name of the document')
upload_parser.add_argument('language', location='form', type=str, required=True, help='Language of the document')
@@ -75,6 +76,7 @@ class AddDocument(Resource):
validate_file_type(extension)
api_input = {
'catalog_id': args.get('catalog_id'),
'name': args.get('name') or filename,
'language': args.get('language'),
'user_context': args.get('user_context'),
@@ -102,6 +104,7 @@ class AddDocument(Resource):
# Models for AddURL
add_url_model = document_ns.model('AddURL', {
'catalog_id': fields.Integer(required='True', description='ID of the catalog the URL needs to be added to'),
'url': fields.String(required=True, description='URL of the document to add'),
'name': fields.String(required=False, description='Name of the document'),
'language': fields.String(required=True, description='Language of the document'),
@@ -138,6 +141,7 @@ class AddURL(Resource):
file_content, filename, extension = process_url(args['url'], tenant_id)
api_input = {
'catalog_id': args['catalog_id'],
'url': args['url'],
'name': args.get('name') or filename,
'language': args['language'],
@@ -213,7 +217,8 @@ class DocumentResource(Resource):
@document_ns.response(200, 'Document refreshed successfully')
def post(self, document_id):
"""Refresh a document"""
new_version, result = refresh_document(document_id)
tenant_id = get_jwt_identity()
new_version, result = refresh_document(document_id, tenant_id)
if new_version:
return {'message': f'Document refreshed. New version: {new_version.id}. Task ID: {result}'}, 200
else:

View File

@@ -0,0 +1,23 @@
{% extends 'base.html' %}
{% from "macros.html" import render_field %}
{% block title %}Catalog Registration{% endblock %}
{% block content_title %}Register Catalog{% endblock %}
{% block content_description %}Define a new catalog of documents in Evie's Library{% endblock %}
{% block content %}
<form method="post">
{{ form.hidden_tag() }}
{% set disabled_fields = [] %}
{% set exclude_fields = [] %}
{% for field in form %}
{{ render_field(field, disabled_fields, exclude_fields) }}
{% endfor %}
<button type="submit" class="btn btn-primary">Register Catalog</button>
</form>
{% endblock %}
{% block content_footer %}
{% endblock %}

View File

@@ -0,0 +1,24 @@
{% extends 'base.html' %}
{% from 'macros.html' import render_selectable_table, render_pagination %}
{% block title %}Documents{% endblock %}
{% block content_title %}Catalogs{% endblock %}
{% block content_description %}View Catalogs for Tenant{% endblock %}
{% block content_class %}<div class="col-xl-12 col-lg-5 col-md-7 mx-auto"></div>{% endblock %}
{% block content %}
<div class="container">
<form method="POST" action="{{ url_for('document_bp.handle_catalog_selection') }}">
{{ render_selectable_table(headers=["Catalog ID", "Name"], rows=rows, selectable=True, id="catalogsTable") }}
<div class="form-group mt-3">
<button type="submit" name="action" value="set_session_catalog" class="btn btn-primary">Set Session Catalog</button>
<button type="submit" name="action" value="edit_catalog" class="btn btn-primary">Edit Catalog</button>
</div>
</form>
</div>
{% endblock %}
{% block content_footer %}
{{ render_pagination(pagination, 'document_bp.catalogs') }}
{% endblock %}

View File

@@ -23,15 +23,23 @@
{{ render_collapsible_section('Filter', 'Filter Options', filter_form) }}
<!-- Document Versions Table -->
{{ render_selectable_sortable_table(
headers=["ID", "File Type", "Processing", "Processing Start", "Processing Finish", "Processing Error"],
rows=rows,
selectable=True,
id="documentVersionsTable",
sort_by=sort_by,
sort_order=sort_order
) }}
<div class="form-group mt-3">
<form method="POST" action="{{ url_for('document_bp.handle_document_version_selection') }}">
<!-- Document Versions Table -->
{{ render_selectable_sortable_table(
headers=["ID", "File Type", "Processing", "Processing Start", "Processing Finish", "Processing Error"],
rows=rows,
selectable=True,
id="documentVersionsTable",
sort_by=sort_by,
sort_order=sort_order
) }}
<div class="form-group mt-4">
<button type="submit" name="action" value="edit_document_version" class="btn btn-primary">Edit Document Version</button>
<button type="submit" name="action" value="process_document_version" class="btn btn-danger">Process Document Version</button>
</div>
</form>
</div>
{% endblock %}
{% block content_footer %}

View File

@@ -1,5 +1,5 @@
{% extends 'base.html' %}
{% from 'macros.html' import render_selectable_table, render_pagination %}
{% from 'macros.html' import render_selectable_table, render_pagination, render_filter_field, render_date_filter_field, render_collapsible_section, render_selectable_sortable_table_with_dict_headers %}
{% block title %}Documents{% endblock %}
@@ -8,18 +8,88 @@
{% block content_class %}<div class="col-xl-12 col-lg-5 col-md-7 mx-auto"></div>{% endblock %}
{% block content %}
<div class="container">
<form method="POST" action="{{ url_for('document_bp.handle_document_selection') }}">
{{ render_selectable_table(headers=["Document ID", "Name", "Valid From", "Valid To"], rows=rows, selectable=True, id="documentsTable") }}
<div class="form-group mt-3">
<button type="submit" name="action" value="edit_document" class="btn btn-primary">Edit Document</button>
<button type="submit" name="action" value="document_versions" class="btn btn-secondary">Show Document Versions</button>
<button type="submit" name="action" value="refresh_document" class="btn btn-secondary">Refresh Document (new version)</button>
</div>
</form>
</div>
<!-- Filter Form -->
{% set filter_form %}
<form method="GET" action="{{ url_for('document_bp.documents') }}">
{{ render_filter_field('catalog_id', 'Catalog', filter_options['catalog_id'], filters.get('catalog_id', [])) }}
{{ render_filter_field('validity', 'Validity', filter_options['validity'], filters.get('validity', [])) }}
<button type="submit" class="btn btn-primary">Apply Filters</button>
</form>
{% endset %}
{{ render_collapsible_section('Filter', 'Filter Options', filter_form) }}
<div class="form-group mt-3">
<form method="POST" action="{{ url_for('document_bp.handle_document_selection') }}">
<!-- Documents Table -->
{{ render_selectable_sortable_table_with_dict_headers(
headers=[
{"text": "ID", "sort": "id"},
{"text": "Name", "sort": "name"},
{"text": "Catalog", "sort": "catalog_name"},
{"text": "Valid From", "sort": "valid_from"},
{"text": "Valid To", "sort": "valid_to"}
],
rows=rows,
selectable=True,
id="documentsTable",
sort_by=sort_by,
sort_order=sort_order
) }}
<div class="form-group mt-4">
<button type="submit" name="action" value="edit_document" class="btn btn-primary">Edit Document</button>
<button type="submit" name="action" value="document_versions" class="btn btn-secondary">Show Document Versions</button>
<button type="submit" name="action" value="refresh_document" class="btn btn-secondary">Refresh Document (new version)</button>
</div>
</form>
</div>
{% endblock %}
{% block content_footer %}
{{ render_pagination(pagination, 'document_bp.documents') }}
{% endblock %}
{% block scripts %}
<script>
document.addEventListener('DOMContentLoaded', function() {
const table = document.getElementById('documentsTable');
const headers = table.querySelectorAll('th.sortable');
headers.forEach(header => {
header.addEventListener('click', function() {
const sortBy = this.dataset.sort;
let sortOrder = 'asc';
if (this.querySelector('.fa-sort-up')) {
sortOrder = 'desc';
} else if (this.querySelector('.fa-sort-down')) {
sortOrder = 'none';
}
window.location.href = updateQueryStringParameter(window.location.href, 'sort_by', sortBy);
window.location.href = updateQueryStringParameter(window.location.href, 'sort_order', sortOrder);
});
});
function updateQueryStringParameter(uri, key, value) {
var re = new RegExp("([?&])" + key + "=.*?(&|$)", "i");
var separator = uri.indexOf('?') !== -1 ? "&" : "?";
if (uri.match(re)) {
return uri.replace(re, '$1' + key + "=" + value + '$2');
}
else {
return uri + separator + key + "=" + value;
}
}
table.addEventListener('change', function(event) {
if (event.target.type === 'radio') {
var selectedRow = event.target.closest('tr');
var documentId = selectedRow.cells[1].textContent;
console.log('Selected Document ID:', documentId);
}
});
});
</script>
{% endblock %}

View File

@@ -0,0 +1,25 @@
{% extends 'base.html' %}
{% from "macros.html" import render_field %}
{% block title %}Edit Catalog{% endblock %}
{% block content_title %}Edit Catalog{% endblock %}
{% block content_description %}Edit a catalog of documents in Evie's Library.
When you change chunking of embedding information, you'll need to manually refresh the library if you want immediate impact.
{% endblock %}
{% block content %}
<form method="post">
{{ form.hidden_tag() }}
{% set disabled_fields = [] %}
{% set exclude_fields = [] %}
{% for field in form %}
{{ render_field(field, disabled_fields, exclude_fields) }}
{% endfor %}
<button type="submit" class="btn btn-primary">Register Catalog</button>
</form>
{% endblock %}
{% block content_footer %}
{% endblock %}

View File

@@ -8,11 +8,17 @@
{% block content %}
<form method="post">
{{ form.hidden_tag() }}
{% set disabled_fields = [] %}
{% set exclude_fields = [] %}
{% for field in form %}
{{ render_field(field, disabled_fields, exclude_fields) }}
{% endfor %}
{% set disabled_fields = [] %}
{% set exclude_fields = [] %}
{{ render_field(form.name, disabled_fields, exclude_fields) }}
{{ render_field(form.valid_from, disabled_fields, exclude_fields) }}
{{ render_field(form.valid_to, disabled_fields, exclude_fields) }}
<div class="form-group">
<label for="catalog_name">Catalog</label>
<input type="text" class="form-control" id="catalog_name" value="{{ catalog_name }}" readonly>
</div>
<button type="submit" class="btn btn-primary">Update Document</button>
</form>
{% endblock %}
{% endblock %}

View File

@@ -1,5 +1,5 @@
<header class="header-2">
<div class="page-header min-vh-25" style="background-image: url({{url_for('static', filename='/assets/img/EveAI_bg.jpg')}})" loading="lazy">
<div class="page-header min-vh-25" style="background-image: url({{url_for('static', filename='/assets/img/EveAI_bg.jpg')}}); background-position: top left; background-repeat: no-repeat; background-size: cover;" loading="lazy">
<span class="mask bg-gradient-primary opacity-4"></span>
<div class="container">
<div class="row">
@@ -10,4 +10,4 @@
</div>
</div>
</div>
</header>
</header>

View File

@@ -54,7 +54,7 @@
{% if embedding.url %}
<a href="{{ embedding.url }}" target="_blank">{{ embedding.url }}</a>
{% else %}
{{ embedding.file_name }}
{{ embedding.object_name }}
{% endif %}
</li>
{% endfor %}

View File

@@ -177,6 +177,48 @@
</div>
{% endmacro %}
{% macro render_selectable_sortable_table_with_dict_headers(headers, rows, selectable, id, sort_by, sort_order) %}
<div class="card">
<div class="table-responsive">
<table class="table align-items-center mb-0" id="{{ id }}">
<thead>
<tr>
{% if selectable %}
<th class="text-uppercase text-secondary text-xxs font-weight-bolder opacity-7">Select</th>
{% endif %}
{% for header in headers %}
<th class="text-uppercase text-secondary text-xxs font-weight-bolder opacity-7 sortable" data-sort="{{ header['sort'] }}">
{{ header['text'] }}
{% if sort_by == header['sort'] %}
{% if sort_order == 'asc' %}
<i class="fas fa-sort-up"></i>
{% elif sort_order == 'desc' %}
<i class="fas fa-sort-down"></i>
{% endif %}
{% else %}
<i class="fas fa-sort"></i>
{% endif %}
</th>
{% endfor %}
</tr>
</thead>
<tbody>
{% for row in rows %}
<tr>
{% if selectable %}
<td><input type="radio" name="selected_row" value="{{ row[0].value }}"></td>
{% endif %}
{% for cell in row %}
<td>{{ cell.value }}</td>
{% endfor %}
</tr>
{% endfor %}
</tbody>
</table>
</div>
</div>
{% endmacro %}
{% macro render_accordion(accordion_id, accordion_items, header_title, header_description) %}
<div class="accordion-1">
<div class="container">

View File

@@ -81,6 +81,8 @@
{% endif %}
{% if current_user.is_authenticated %}
{{ dropdown('Document Mgmt', 'note_stack', [
{'name': 'Add Catalog', 'url': '/document/catalog', 'roles': ['Super User', 'Tenant Admin']},
{'name': 'All Catalogs', 'url': '/document/catalogs', 'roles': ['Super User', 'Tenant Admin']},
{'name': 'Add Document', 'url': '/document/add_document', 'roles': ['Super User', 'Tenant Admin']},
{'name': 'Add URL', 'url': '/document/add_url', 'roles': ['Super User', 'Tenant Admin']},
{'name': 'Add a list of URLs', 'url': '/document/add_urls', 'roles': ['Super User', 'Tenant Admin']},
@@ -114,6 +116,17 @@
{% endif %}
</ul>
{% if current_user.is_authenticated %}
<ul class="navbar-nav d-lg-block d-none">
<li class="nav-item">
<a href="/document/catalogs" class="btn btn-sm bg-gradient-primary mb-0 me-2">
{% if 'catalog_name' in session %}
CATALOG: {{ session['catalog_name'] }}
{% else %}
CHOOSE CATALOG
{% endif %}
</a>
</li>
</ul>
<ul class="navbar-nav d-lg-block d-none">
<li class="nav-item">
<a href="/session_defaults" class="btn btn-sm bg-gradient-primary mb-0">

View File

@@ -10,7 +10,7 @@
<form method="post">
{{ form.hidden_tag() }}
<!-- Main Tenant Information -->
{% set main_fields = ['name', 'website', 'default_language', 'allowed_languages', 'rag_context', 'type'] %}
{% set main_fields = ['name', 'website', 'default_language', 'allowed_languages', 'timezone','rag_context', 'type'] %}
{% for field in form %}
{{ render_included_field(field, disabled_fields=[], include_fields=main_fields) }}
{% endfor %}
@@ -30,21 +30,6 @@
License Information
</a>
</li>
<li class="nav-item">
<a class="nav-link mb-0 px-0 py-1" data-toggle="tab" href="#chunking-tab" role="tab" aria-controls="chunking" aria-selected="false">
Chunking
</a>
</li>
<li class="nav-item">
<a class="nav-link mb-0 px-0 py-1" data-toggle="tab" href="#embedding-search-tab" role="tab" aria-controls="html-chunking" aria-selected="false">
Embedding Search
</a>
</li>
<li class="nav-item">
<a class="nav-link mb-0 px-0 py-1" data-toggle="tab" href="#tuning-tab" role="tab" aria-controls="html-chunking" aria-selected="false">
Tuning
</a>
</li>
</ul>
</div>
<div class="tab-content tab-space">
@@ -62,8 +47,10 @@
{{ render_included_field(field, disabled_fields=[], include_fields=license_fields) }}
{% endfor %}
<!-- Register API Key Button -->
<button type="button" class="btn btn-primary" onclick="generateNewChatApiKey()">Register Chat API Key</button>
<button type="button" class="btn btn-primary" onclick="generateNewApiKey()">Register API Key</button>
<div class="form-group">
<button type="button" class="btn btn-primary" onclick="generateNewChatApiKey()">Register Chat API Key</button>
<button type="button" class="btn btn-primary" onclick="generateNewApiKey()">Register API Key</button>
</div>
<!-- API Key Display Field -->
<div id="chat-api-key-field" style="display:none;">
<label for="chat-api-key">Chat API Key:</label>
@@ -78,27 +65,6 @@
<p id="copy-message" style="display:none;color:green;">API key copied to clipboard</p>
</div>
</div>
<!-- Chunking Settings Tab -->
<div class="tab-pane fade" id="chunking-tab" role="tabpanel">
{% set html_fields = ['html_tags', 'html_end_tags', 'html_included_elements', 'html_excluded_elements', 'html_excluded_classes', 'min_chunk_size', 'max_chunk_size'] %}
{% for field in form %}
{{ render_included_field(field, disabled_fields=[], include_fields=html_fields) }}
{% endfor %}
</div>
<!-- Embedding Search Settings Tab -->
<div class="tab-pane fade" id="embedding-search-tab" role="tabpanel">
{% set es_fields = ['es_k', 'es_similarity_threshold', ] %}
{% for field in form %}
{{ render_included_field(field, disabled_fields=[], include_fields=es_fields) }}
{% endfor %}
</div>
<!-- Tuning Settings Tab -->
<div class="tab-pane fade" id="tuning-tab" role="tabpanel">
{% set tuning_fields = ['embed_tuning', 'rag_tuning', ] %}
{% for field in form %}
{{ render_included_field(field, disabled_fields=[], include_fields=tuning_fields) }}
{% endfor %}
</div>
</div>
</div>
</div>

View File

@@ -30,21 +30,6 @@
License Information
</a>
</li>
<li class="nav-item">
<a class="nav-link mb-0 px-0 py-1" data-toggle="tab" href="#chunking-tab" role="tab" aria-controls="chunking" aria-selected="false">
Chunking
</a>
</li>
<li class="nav-item">
<a class="nav-link mb-0 px-0 py-1" data-toggle="tab" href="#embedding-search-tab" role="tab" aria-controls="html-chunking" aria-selected="false">
Embedding Search
</a>
</li>
<li class="nav-item">
<a class="nav-link mb-0 px-0 py-1" data-toggle="tab" href="#tuning-tab" role="tab" aria-controls="html-chunking" aria-selected="false">
Tuning
</a>
</li>
</ul>
</div>
<div class="tab-content tab-space">
@@ -78,27 +63,6 @@
<p id="copy-message" style="display:none;color:green;">API key copied to clipboard</p>
</div>
</div>
<!-- Chunking Settings Tab -->
<div class="tab-pane fade" id="chunking-tab" role="tabpanel">
{% set html_fields = ['html_tags', 'html_end_tags', 'html_included_elements', 'html_excluded_elements', 'html_excluded_classes', 'min_chunk_size', 'max_chunk_size'] %}
{% for field in form %}
{{ render_included_field(field, disabled_fields=html_fields, include_fields=html_fields) }}
{% endfor %}
</div>
<!-- Embedding Search Settings Tab -->
<div class="tab-pane fade" id="embedding-search-tab" role="tabpanel">
{% set es_fields = ['es_k', 'es_similarity_threshold', ] %}
{% for field in form %}
{{ render_included_field(field, disabled_fields=es_fields, include_fields=es_fields) }}
{% endfor %}
</div>
<!-- Tuning Settings Tab -->
<div class="tab-pane fade" id="tuning-tab" role="tabpanel">
{% set tuning_fields = ['embed_tuning', 'rag_tuning', ] %}
{% for field in form %}
{{ render_included_field(field, disabled_fields=tuning_fields, include_fields=tuning_fields) }}
{% endfor %}
</div>
</div>
</div>
</div>

View File

@@ -1,8 +1,8 @@
from flask import session, current_app
from flask_wtf import FlaskForm
from wtforms import (StringField, BooleanField, SubmitField, DateField,
from wtforms import (StringField, BooleanField, SubmitField, DateField, IntegerField, FloatField, SelectMultipleField,
SelectField, FieldList, FormField, TextAreaField, URLField)
from wtforms.validators import DataRequired, Length, Optional, URL, ValidationError
from wtforms.validators import DataRequired, Length, Optional, URL, ValidationError, NumberRange
from flask_wtf.file import FileField, FileAllowed, FileRequired
import json
@@ -23,6 +23,36 @@ def validate_json(form, field):
raise ValidationError('Invalid JSON format')
class CatalogForm(FlaskForm):
name = StringField('Name', validators=[DataRequired(), Length(max=50)])
description = TextAreaField('Description', validators=[Optional()])
# HTML Embedding Variables
html_tags = StringField('HTML Tags', validators=[DataRequired()],
default='p, h1, h2, h3, h4, h5, h6, li, , tbody, tr, td')
html_end_tags = StringField('HTML End Tags', validators=[DataRequired()],
default='p, li')
html_included_elements = StringField('HTML Included Elements', validators=[Optional()])
html_excluded_elements = StringField('HTML Excluded Elements', validators=[Optional()])
html_excluded_classes = StringField('HTML Excluded Classes', validators=[Optional()])
min_chunk_size = IntegerField('Minimum Chunk Size (2000)', validators=[NumberRange(min=0), Optional()],
default=2000)
max_chunk_size = IntegerField('Maximum Chunk Size (3000)', validators=[NumberRange(min=0), Optional()],
default=3000)
# Embedding Search variables
es_k = IntegerField('Limit for Searching Embeddings (5)',
default=5,
validators=[NumberRange(min=0)])
es_similarity_threshold = FloatField('Similarity Threshold for Searching Embeddings (0.5)',
default=0.5,
validators=[NumberRange(min=0, max=1)])
# Chat Variables
chat_RAG_temperature = FloatField('RAG Temperature', default=0.3, validators=[NumberRange(min=0, max=1)])
chat_no_RAG_temperature = FloatField('No RAG Temperature', default=0.5, validators=[NumberRange(min=0, max=1)])
# Tuning variables
embed_tuning = BooleanField('Enable Embedding Tuning', default=False)
rag_tuning = BooleanField('Enable RAG Tuning', default=False)
class AddDocumentForm(FlaskForm):
file = FileField('File', validators=[FileRequired(), allowed_file])
name = StringField('Name', validators=[Length(max=100)])

View File

@@ -0,0 +1,102 @@
from datetime import datetime
from flask import request, render_template, session
from sqlalchemy import desc, asc, or_, and_, cast, Integer
from common.models.document import Document, Catalog
from common.utils.filtered_list_view import FilteredListView
from common.utils.view_assistants import prepare_table_for_macro
class DocumentListView(FilteredListView):
allowed_filters = ['catalog_id', 'validity']
allowed_sorts = ['id', 'name', 'catalog_name', 'valid_from', 'valid_to']
def get_query(self):
return Document.query.join(Catalog).add_columns(
Document.id,
Document.name,
Catalog.name.label('catalog_name'),
Document.valid_from,
Document.valid_to
)
def apply_filters(self, query):
filters = request.args.to_dict(flat=False)
if 'catalog_id' in filters:
catalog_ids = filters['catalog_id']
if catalog_ids:
# Convert catalog_ids to a list of integers
catalog_ids = [int(cid) for cid in catalog_ids if cid.isdigit()]
if catalog_ids:
query = query.filter(Document.catalog_id.in_(catalog_ids))
if 'validity' in filters:
now = datetime.utcnow().date()
if 'valid' in filters['validity']:
query = query.filter(
and_(
or_(Document.valid_from.is_(None), Document.valid_from <= now),
or_(Document.valid_to.is_(None), Document.valid_to >= now)
)
)
return query
def apply_sorting(self, query):
sort_by = request.args.get('sort_by', 'id')
sort_order = request.args.get('sort_order', 'asc')
if sort_by in self.allowed_sorts:
if sort_by == 'catalog_name':
column = Catalog.name
else:
column = getattr(Document, sort_by)
if sort_order == 'asc':
query = query.order_by(asc(column))
elif sort_order == 'desc':
query = query.order_by(desc(column))
return query
def get(self):
query = self.get_query()
query = self.apply_filters(query)
query = self.apply_sorting(query)
pagination = self.paginate(query)
def format_date(date):
if isinstance(date, datetime):
return date.strftime('%Y-%m-%d')
elif isinstance(date, str):
return date
else:
return ''
rows = [
[
{'value': item.id, 'class': '', 'type': 'text'},
{'value': item.name, 'class': '', 'type': 'text'},
{'value': item.catalog_name, 'class': '', 'type': 'text'},
{'value': format_date(item.valid_from), 'class': '', 'type': 'text'},
{'value': format_date(item.valid_to), 'class': '', 'type': 'text'}
] for item in pagination.items
]
catalogs = Catalog.query.all()
context = {
'rows': rows,
'pagination': pagination,
'filters': request.args.to_dict(flat=False),
'sort_by': request.args.get('sort_by', 'id'),
'sort_order': request.args.get('sort_order', 'asc'),
'filter_options': self.get_filter_options(catalogs)
}
return render_template(self.template, **context)
def get_filter_options(self, catalogs):
return {
'catalog_id': [(str(cat.id), cat.name) for cat in catalogs],
'validity': [('valid', 'Valid'), ('all', 'All')]
}

View File

@@ -12,7 +12,7 @@ class DocumentVersionListView(FilteredListView):
allowed_sorts = ['id', 'processing_started_at', 'processing_finished_at', 'processing_error']
def get_query(self):
return DocumentVersion.query.join(Document).filter(Document.tenant_id == session.get('tenant', {}).get('id'))
return DocumentVersion.query.join(Document)
def apply_filters(self, query):
filters = request.args.to_dict()

View File

@@ -1,9 +1,11 @@
import ast
from datetime import datetime as dt, timezone as tz
from babel.messages.setuptools_frontend import update_catalog
from flask import request, redirect, flash, render_template, Blueprint, session, current_app
from flask_security import roles_accepted, current_user
from sqlalchemy import desc
from sqlalchemy.orm import aliased
from werkzeug.utils import secure_filename
from sqlalchemy.exc import SQLAlchemyError
import requests
@@ -12,18 +14,20 @@ from urllib.parse import urlparse, unquote
import io
import json
from common.models.document import Document, DocumentVersion
from common.models.document import Document, DocumentVersion, Catalog
from common.extensions import db, minio_client
from common.utils.document_utils import validate_file_type, create_document_stack, start_embedding_task, process_url, \
process_multiple_urls, get_documents_list, edit_document, \
edit_document_version, refresh_document
from common.utils.eveai_exceptions import EveAIInvalidLanguageException, EveAIUnsupportedFileType, \
EveAIDoubleURLException
from .document_forms import AddDocumentForm, AddURLForm, EditDocumentForm, EditDocumentVersionForm, AddURLsForm
from .document_forms import AddDocumentForm, AddURLForm, EditDocumentForm, EditDocumentVersionForm, AddURLsForm, \
CatalogForm
from common.utils.middleware import mw_before_request
from common.utils.celery_utils import current_celery
from common.utils.nginx_utils import prefixed_url_for
from common.utils.view_assistants import form_validation_failed, prepare_table_for_macro, form_to_dict
from .document_list_view import DocumentListView
from .document_version_list_view import DocumentVersionListView
document_bp = Blueprint('document_bp', __name__, url_prefix='/document')
@@ -52,6 +56,123 @@ def before_request():
raise
@document_bp.route('/catalog', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Tenant Admin')
def catalog():
form = CatalogForm()
if form.validate_on_submit():
tenant_id = session.get('tenant').get('id')
new_catalog = Catalog()
form.populate_obj(new_catalog)
# Handle Embedding Variables
new_catalog.html_tags = [tag.strip() for tag in form.html_tags.data.split(',')] if form.html_tags.data else []
new_catalog.html_end_tags = [tag.strip() for tag in form.html_end_tags.data.split(',')] \
if form.html_end_tags.data else []
new_catalog.html_included_elements = [tag.strip() for tag in form.html_included_elements.data.split(',')] \
if form.html_included_elements.data else []
new_catalog.html_excluded_elements = [tag.strip() for tag in form.html_excluded_elements.data.split(',')] \
if form.html_excluded_elements.data else []
new_catalog.html_excluded_classes = [cls.strip() for cls in form.html_excluded_classes.data.split(',')] \
if form.html_excluded_classes.data else []
set_logging_information(new_catalog, dt.now(tz.utc))
try:
db.session.add(new_catalog)
db.session.commit()
flash('Catalog successfully added!', 'success')
current_app.logger.info(f'Catalog {new_catalog.name} successfully added for tenant {tenant_id}!')
except SQLAlchemyError as e:
db.session.rollback()
flash(f'Failed to add catalog. Error: {e}', 'danger')
current_app.logger.error(f'Failed to add catalog {new_catalog.name}'
f'for tenant {tenant_id}. Error: {str(e)}')
return render_template('document/catalog.html', form=form)
@document_bp.route('/catalogs', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Tenant Admin')
def catalogs():
page = request.args.get('page', 1, type=int)
per_page = request.args.get('per_page', 10, type=int)
query = Catalog.query.order_by(Catalog.id)
pagination = query.paginate(page=page, per_page=per_page)
the_catalogs = pagination.items
# prepare table data
rows = prepare_table_for_macro(the_catalogs, [('id', ''), ('name', '')])
# Render the catalogs in a template
return render_template('document/catalogs.html', rows=rows, pagination=pagination)
@document_bp.route('/handle_catalog_selection', methods=['POST'])
@roles_accepted('Super User', 'Tenant Admin')
def handle_catalog_selection():
catalog_identification = request.form.get('selected_row')
catalog_id = ast.literal_eval(catalog_identification).get('value')
action = request.form['action']
catalog = Catalog.query.get_or_404(catalog_id)
if action == 'set_session_catalog':
current_app.logger.info(f'Setting session catalog to {catalog.name}')
session['catalog_id'] = catalog_id
session['catalog_name'] = catalog.name
current_app.logger.info(f'Finished setting session catalog to {catalog.name}')
elif action == 'edit_catalog':
return redirect(prefixed_url_for('document_bp.edit_catalog', catalog_id=catalog_id))
return redirect(prefixed_url_for('document_bp.catalogs'))
@document_bp.route('/catalog/<int:catalog_id>', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Tenant Admin')
def edit_catalog(catalog_id):
catalog = Catalog.query.get_or_404(catalog_id)
form = CatalogForm(obj=catalog)
tenant_id = session.get('tenant').get('id')
# Convert arrays to comma-separated strings for display
if request.method == 'GET':
form.html_tags.data = ', '.join(catalog.html_tags or '')
form.html_end_tags.data = ', '.join(catalog.html_end_tags or '')
form.html_included_elements.data = ', '.join(catalog.html_included_elements or '')
form.html_excluded_elements.data = ', '.join(catalog.html_excluded_elements or '')
form.html_excluded_classes.data = ', '.join(catalog.html_excluded_classes or '')
if request.method == 'POST' and form.validate_on_submit():
form.populate_obj(catalog)
# Handle Embedding Variables
catalog.html_tags = [tag.strip() for tag in form.html_tags.data.split(',')] if form.html_tags.data else []
catalog.html_end_tags = [tag.strip() for tag in form.html_end_tags.data.split(',')] \
if form.html_end_tags.data else []
catalog.html_included_elements = [tag.strip() for tag in form.html_included_elements.data.split(',')] \
if form.html_included_elements.data else []
catalog.html_excluded_elements = [tag.strip() for tag in form.html_excluded_elements.data.split(',')] \
if form.html_excluded_elements.data else []
catalog.html_excluded_classes = [cls.strip() for cls in form.html_excluded_classes.data.split(',')] \
if form.html_excluded_classes.data else []
update_logging_information(catalog, dt.now(tz.utc))
try:
db.session.add(catalog)
db.session.commit()
flash('Catalog successfully updated successfully!', 'success')
current_app.logger.info(f'Catalog {catalog.name} successfully updated for tenant {tenant_id}')
except SQLAlchemyError as e:
db.session.rollback()
flash(f'Failed to update catalog. Error: {e}', 'danger')
current_app.logger.error(f'Failed to update catalog {catalog_id} for tenant {tenant_id}. Error: {str(e)}')
return redirect(prefixed_url_for('document_bp.catalogs'))
else:
form_validation_failed(request, form)
return render_template('document/edit_catalog.html', form=form, catalog_id=catalog_id)
@document_bp.route('/add_document', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Tenant Admin')
def add_document():
@@ -60,6 +181,7 @@ def add_document():
if form.validate_on_submit():
try:
tenant_id = session['tenant']['id']
catalog_id = session['catalog_id']
file = form.file.data
filename = secure_filename(file.filename)
extension = filename.rsplit('.', 1)[1].lower()
@@ -68,6 +190,7 @@ def add_document():
current_app.logger.debug(f'Language on form: {form.language.data}')
api_input = {
'catalog_id': catalog_id,
'name': form.name.data,
'language': form.language.data,
'user_context': form.user_context.data,
@@ -100,11 +223,13 @@ def add_url():
if form.validate_on_submit():
try:
tenant_id = session['tenant']['id']
catalog_id = session['catalog_id']
url = form.url.data
file_content, filename, extension = process_url(url, tenant_id)
api_input = {
'catalog_id': catalog_id,
'name': form.name.data or filename,
'url': url,
'language': form.language.data,
@@ -171,22 +296,23 @@ def add_urls():
@document_bp.route('/documents', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Tenant Admin')
def documents():
page = request.args.get('page', 1, type=int)
per_page = request.args.get('per_page', 10, type=int)
pagination = get_documents_list(page, per_page)
docs = pagination.items
rows = prepare_table_for_macro(docs, [('id', ''), ('name', ''), ('valid_from', ''), ('valid_to', '')])
return render_template('document/documents.html', rows=rows, pagination=pagination)
view = DocumentListView(Document, 'document/documents.html', per_page=10)
return view.get()
@document_bp.route('/handle_document_selection', methods=['POST'])
@roles_accepted('Super User', 'Tenant Admin')
def handle_document_selection():
document_identification = request.form['selected_row']
doc_id = ast.literal_eval(document_identification).get('value')
if isinstance(document_identification, int) or document_identification.isdigit():
doc_id = int(document_identification)
else:
# If it's not an integer, assume it's a string representation of a dictionary
try:
doc_id = ast.literal_eval(document_identification).get('value')
except (ValueError, AttributeError):
flash('Invalid document selection.', 'error')
return redirect(prefixed_url_for('document_bp.documents'))
action = request.form['action']
@@ -208,9 +334,25 @@ def handle_document_selection():
@document_bp.route('/edit_document/<int:document_id>', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Tenant Admin')
def edit_document_view(document_id):
doc = Document.query.get_or_404(document_id)
# Use an alias for the Catalog to avoid column name conflicts
CatalogAlias = aliased(Catalog)
# Query for the document and its catalog
result = db.session.query(Document, CatalogAlias.name.label('catalog_name')) \
.join(CatalogAlias, Document.catalog_id == CatalogAlias.id) \
.filter(Document.id == document_id) \
.first_or_404()
doc, catalog_name = result
form = EditDocumentForm(obj=doc)
if request.method == 'GET':
# Populate form with current values
form.name.data = doc.name
form.valid_from.data = doc.valid_from
form.valid_to.data = doc.valid_to
if form.validate_on_submit():
updated_doc, error = edit_document(
document_id,
@@ -226,7 +368,7 @@ def edit_document_view(document_id):
else:
form_validation_failed(request, form)
return render_template('document/edit_document.html', form=form, document_id=document_id)
return render_template('document/edit_document.html', form=form, document_id=document_id, catalog_name=catalog_name)
@document_bp.route('/edit_document_version/<int:document_version_id>', methods=['GET', 'POST'])
@@ -280,7 +422,15 @@ def document_versions(document_id):
@roles_accepted('Super User', 'Tenant Admin')
def handle_document_version_selection():
document_version_identification = request.form['selected_row']
doc_vers_id = ast.literal_eval(document_version_identification).get('value')
if isinstance(document_version_identification, int) or document_version_identification.isdigit():
doc_vers_id = int(document_version_identification)
else:
# If it's not an integer, assume it's a string representation of a dictionary
try:
doc_vers_id = ast.literal_eval(document_version_identification).get('value')
except (ValueError, AttributeError):
flash('Invalid document version selection.', 'error')
return redirect(prefixed_url_for('document_bp.document_versions_list'))
action = request.form['action']
@@ -332,7 +482,7 @@ def refresh_all_documents():
def refresh_document_view(document_id):
new_version, result = refresh_document(document_id)
new_version, result = refresh_document(document_id, session['tenant']['id'])
if new_version:
flash(f'Document refreshed. New version: {new_version.id}. Task ID: {result}', 'success')
else:
@@ -397,47 +547,47 @@ def fetch_html(url):
return response.content
def prepare_document_data(docs):
rows = []
for doc in docs:
doc_row = [{'value': doc.name, 'class': '', 'type': 'text'},
{'value': doc.created_at.strftime("%Y-%m-%d %H:%M:%S"), 'class': '', 'type': 'text'}]
# Document basic details
if doc.valid_from:
doc_row.append({'value': doc.valid_from.strftime("%Y-%m-%d"), 'class': '', 'type': 'text'})
else:
doc_row.append({'value': '', 'class': '', 'type': 'text'})
# Nested languages and versions
languages_rows = []
for lang in doc.languages:
lang_row = [{'value': lang.language, 'class': '', 'type': 'text'}]
# Latest version details if available (should be available ;-) )
if lang.latest_version:
lang_row.append({'value': lang.latest_version.created_at.strftime("%Y-%m-%d %H:%M:%S"),
'class': '', 'type': 'text'})
if lang.latest_version.url:
lang_row.append({'value': lang.latest_version.url,
'class': '', 'type': 'link', 'href': lang.latest_version.url})
else:
lang_row.append({'value': '', 'class': '', 'type': 'text'})
if lang.latest_version.file_name:
lang_row.append({'value': lang.latest_version.file_name, 'class': '', 'type': 'text'})
else:
lang_row.append({'value': '', 'class': '', 'type': 'text'})
if lang.latest_version.file_type:
lang_row.append({'value': lang.latest_version.file_type, 'class': '', 'type': 'text'})
else:
lang_row.append({'value': '', 'class': '', 'type': 'text'})
# Include other details as necessary
languages_rows.append(lang_row)
doc_row.append({'is_group': True, 'colspan': '5',
'headers': ['Language', 'Latest Version', 'URL', 'File Name', 'Type'],
'sub_rows': languages_rows})
rows.append(doc_row)
return rows
# def prepare_document_data(docs):
# rows = []
# for doc in docs:
# doc_row = [{'value': doc.name, 'class': '', 'type': 'text'},
# {'value': doc.created_at.strftime("%Y-%m-%d %H:%M:%S"), 'class': '', 'type': 'text'}]
# # Document basic details
# if doc.valid_from:
# doc_row.append({'value': doc.valid_from.strftime("%Y-%m-%d"), 'class': '', 'type': 'text'})
# else:
# doc_row.append({'value': '', 'class': '', 'type': 'text'})
#
# # Nested languages and versions
# languages_rows = []
# for lang in doc.languages:
# lang_row = [{'value': lang.language, 'class': '', 'type': 'text'}]
#
# # Latest version details if available (should be available ;-) )
# if lang.latest_version:
# lang_row.append({'value': lang.latest_version.created_at.strftime("%Y-%m-%d %H:%M:%S"),
# 'class': '', 'type': 'text'})
# if lang.latest_version.url:
# lang_row.append({'value': lang.latest_version.url,
# 'class': '', 'type': 'link', 'href': lang.latest_version.url})
# else:
# lang_row.append({'value': '', 'class': '', 'type': 'text'})
#
# if lang.latest_version.object_name:
# lang_row.append({'value': lang.latest_version.object_name, 'class': '', 'type': 'text'})
# else:
# lang_row.append({'value': '', 'class': '', 'type': 'text'})
#
# if lang.latest_version.file_type:
# lang_row.append({'value': lang.latest_version.file_type, 'class': '', 'type': 'text'})
# else:
# lang_row.append({'value': '', 'class': '', 'type': 'text'})
# # Include other details as necessary
#
# languages_rows.append(lang_row)
#
# doc_row.append({'is_group': True, 'colspan': '5',
# 'headers': ['Language', 'Latest Version', 'URL', 'File Name', 'Type'],
# 'sub_rows': languages_rows})
# rows.append(doc_row)
# return rows

View File

@@ -93,17 +93,17 @@ def view_chat_session(chat_session_id):
# Fetch all related embeddings for the interactions in this session
embedding_query = (db.session.query(InteractionEmbedding.interaction_id,
DocumentVersion.url,
DocumentVersion.file_name)
DocumentVersion.object_name)
.join(Embedding, InteractionEmbedding.embedding_id == Embedding.id)
.join(DocumentVersion, Embedding.doc_vers_id == DocumentVersion.id)
.filter(InteractionEmbedding.interaction_id.in_([i.id for i in interactions])))
# Create a dictionary to store embeddings for each interaction
embeddings_dict = {}
for interaction_id, url, file_name in embedding_query:
for interaction_id, url, object_name in embedding_query:
if interaction_id not in embeddings_dict:
embeddings_dict[interaction_id] = []
embeddings_dict[interaction_id].append({'url': url, 'file_name': file_name})
embeddings_dict[interaction_id].append({'url': url, 'object_name': object_name})
return render_template('interaction/view_chat_session.html',
chat_session=chat_session,

View File

@@ -48,22 +48,6 @@ def tenant():
new_tenant = Tenant()
form.populate_obj(new_tenant)
# Handle Embedding Variables
new_tenant.html_tags = [tag.strip() for tag in form.html_tags.data.split(',')] if form.html_tags.data else []
new_tenant.html_end_tags = [tag.strip() for tag in form.html_end_tags.data.split(',')] \
if form.html_end_tags.data else []
new_tenant.html_included_elements = [tag.strip() for tag in form.html_included_elements.data.split(',')] \
if form.html_included_elements.data else []
new_tenant.html_excluded_elements = [tag.strip() for tag in form.html_excluded_elements.data.split(',')] \
if form.html_excluded_elements.data else []
new_tenant.html_excluded_classes = [cls.strip() for cls in form.html_excluded_classes.data.split(',')] \
if form.html_excluded_classes.data else []
current_app.logger.debug(f'html_tags: {new_tenant.html_tags},'
f'html_end_tags: {new_tenant.html_end_tags},'
f'html_included_elements: {new_tenant.html_included_elements},'
f'html_excluded_elements: {new_tenant.html_excluded_elements}')
# Handle Timestamps
timestamp = dt.now(tz.utc)
new_tenant.created_at = timestamp
@@ -105,30 +89,11 @@ def edit_tenant(tenant_id):
if request.method == 'GET':
# Populate the form with tenant data
form.populate_obj(tenant)
if tenant.html_tags:
form.html_tags.data = ', '.join(tenant.html_tags)
if tenant.html_end_tags:
form.html_end_tags.data = ', '.join(tenant.html_end_tags)
if tenant.html_included_elements:
form.html_included_elements.data = ', '.join(tenant.html_included_elements)
if tenant.html_excluded_elements:
form.html_excluded_elements.data = ', '.join(tenant.html_excluded_elements)
if tenant.html_excluded_classes:
form.html_excluded_classes.data = ', '.join(tenant.html_excluded_classes)
if form.validate_on_submit():
current_app.logger.debug(f'Updating tenant {tenant_id}')
# Populate the tenant with form data
form.populate_obj(tenant)
# Then handle the special fields manually
tenant.html_tags = [tag.strip() for tag in form.html_tags.data.split(',') if tag.strip()]
tenant.html_end_tags = [tag.strip() for tag in form.html_end_tags.data.split(',') if tag.strip()]
tenant.html_included_elements = [elem.strip() for elem in form.html_included_elements.data.split(',') if
elem.strip()]
tenant.html_excluded_elements = [elem.strip() for elem in form.html_excluded_elements.data.split(',') if
elem.strip()]
tenant.html_excluded_classes = [elem.strip() for elem in form.html_excluded_classes.data.split(',') if
elem.strip()]
db.session.commit()
flash('Tenant updated successfully.', 'success')
@@ -266,10 +231,16 @@ def handle_tenant_selection():
tenant_identification = request.form['selected_row']
tenant_id = ast.literal_eval(tenant_identification).get('value')
the_tenant = Tenant.query.get(tenant_id)
# set tenant information in the session
session['tenant'] = the_tenant.to_dict()
session['default_language'] = the_tenant.default_language
session['embedding_model'] = the_tenant.embedding_model
session['llm_model'] = the_tenant.llm_model
# remove catalog-related items from the session
session.pop('catalog_id', None)
session.pop('catalog_name', None)
action = request.form['action']
match action:

View File

@@ -36,7 +36,7 @@ def ping():
def detail_question(question, language, model_variables, session_id):
current_app.logger.debug(f'Detail question: {question}')
current_app.logger.debug(f'model_varialbes: {model_variables}')
current_app.logger.debug(f'model_variables: {model_variables}')
current_app.logger.debug(f'session_id: {session_id}')
retriever = EveAIHistoryRetriever(model_variables=model_variables, session_id=session_id)
llm = model_variables['llm']
@@ -93,12 +93,6 @@ def ask_question(tenant_id, question, language, session_id, user_timezone, room)
current_app.logger.error(f'ask_question: Error initializing chat session in database: {e}')
raise
if tenant.rag_tuning:
current_app.rag_tuning_logger.debug(f'Received question for tenant {tenant_id}:\n{question}. Processing...')
current_app.rag_tuning_logger.debug(f'Tenant Information: \n{tenant.to_dict()}')
current_app.rag_tuning_logger.debug(f'===================================================================')
current_app.rag_tuning_logger.debug(f'===================================================================')
with current_event.create_span("RAG Answer"):
result, interaction = answer_using_tenant_rag(question, language, tenant, chat_session)
result['algorithm'] = current_app.config['INTERACTION_ALGORITHMS']['RAG_TENANT']['name']
@@ -138,8 +132,7 @@ def answer_using_tenant_rag(question, language, tenant, chat_session):
with current_event.create_span("Detail Question"):
detailed_question = detail_question(question, language, model_variables, chat_session.session_id)
current_app.logger.debug(f'Original question:\n {question}\n\nDetailed question: {detailed_question}')
if tenant.rag_tuning:
if model_variables['rag_tuning']:
current_app.rag_tuning_logger.debug(f'Detailed Question for tenant {tenant.id}:\n{question}.')
current_app.rag_tuning_logger.debug(f'-------------------------------------------------------------------')
new_interaction.detailed_question = detailed_question
@@ -153,7 +146,7 @@ def answer_using_tenant_rag(question, language, tenant, chat_session):
full_template = replace_variable_in_template(language_template, "{tenant_context}", model_variables['rag_context'])
rag_prompt = ChatPromptTemplate.from_template(full_template)
setup_and_retrieval = RunnableParallel({"context": retriever, "question": RunnablePassthrough()})
if tenant.rag_tuning:
if model_variables['rag_tuning']:
current_app.rag_tuning_logger.debug(f'Full prompt for tenant {tenant.id}:\n{full_template}.')
current_app.rag_tuning_logger.debug(f'-------------------------------------------------------------------')
@@ -181,7 +174,7 @@ def answer_using_tenant_rag(question, language, tenant, chat_session):
current_app.logger.debug(f'ask_question: result answer: {result['answer']}')
current_app.logger.debug(f'ask_question: result citations: {result["citations"]}')
current_app.logger.debug(f'ask_question: insufficient information: {result["insufficient_info"]}')
if tenant.rag_tuning:
if model_variables['rag_tuning']:
current_app.rag_tuning_logger.debug(f'ask_question: result answer: {result['answer']}')
current_app.rag_tuning_logger.debug(f'ask_question: result citations: {result["citations"]}')
current_app.rag_tuning_logger.debug(f'ask_question: insufficient information: {result["insufficient_info"]}')
@@ -197,7 +190,7 @@ def answer_using_tenant_rag(question, language, tenant, chat_session):
)
existing_embedding_ids = [emb.id for emb in embeddings]
urls = list(set(emb.document_version.url for emb in embeddings))
if tenant.rag_tuning:
if model_variables['rag_tuning']:
current_app.rag_tuning_logger.debug(f'Referenced documents for answer for tenant {tenant.id}:\n')
current_app.rag_tuning_logger.debug(f'{urls}')
current_app.rag_tuning_logger.debug(f'-------------------------------------------------------------------')

View File

@@ -15,6 +15,7 @@ class HTMLProcessor(Processor):
self.html_end_tags = model_variables['html_end_tags']
self.html_included_elements = model_variables['html_included_elements']
self.html_excluded_elements = model_variables['html_excluded_elements']
self.html_excluded_classes = model_variables['html_excluded_classes']
self.chunk_size = model_variables['processing_chunk_size'] # Adjust this based on your LLM's optimal input size
self.chunk_overlap = model_variables[
'processing_chunk_overlap'] # Adjust for context preservation between chunks
@@ -45,7 +46,7 @@ class HTMLProcessor(Processor):
self._log(f'Parsing HTML for tenant {self.tenant.id}')
soup = BeautifulSoup(html_content, 'html.parser')
extracted_html = ''
excluded_classes = self._parse_excluded_classes(self.tenant.html_excluded_classes)
excluded_classes = self._parse_excluded_classes(self.html_excluded_classes)
if self.html_included_elements:
elements_to_parse = soup.find_all(self.html_included_elements)

View File

@@ -13,7 +13,7 @@ from langchain_core.runnables import RunnablePassthrough
from sqlalchemy.exc import SQLAlchemyError
from common.extensions import db, minio_client
from common.models.document import DocumentVersion, Embedding
from common.models.document import DocumentVersion, Embedding, Document
from common.models.user import Tenant
from common.utils.celery_utils import current_celery
from common.utils.database import Database
@@ -50,8 +50,12 @@ def create_embeddings(tenant_id, document_version_id):
if document_version is None:
raise Exception(f'Document version {document_version_id} not found')
# Retrieve the Catalog ID
doc = Document.query.get_or_404(document_version.doc_id)
catalog_id = doc.catalog_id
# Select variables to work with depending on tenant and model
model_variables = select_model_variables(tenant)
model_variables = select_model_variables(tenant, catalog_id=catalog_id)
current_app.logger.debug(f'Model variables: {model_variables}')
except Exception as e:

View File

@@ -51,6 +51,10 @@ No additional configuration is needed; the plugin will automatically detect the
## Versions
### 1.1.1 - Add Reinitialisation functionality
### 1.1.0 - Add Catalog Functionality
### 1.0.x - Bugfixing Releases
### 1.0.0 - Initial Release

View File

@@ -3,7 +3,7 @@
* Plugin Name: EveAI Sync
* Plugin URI: https://askeveai.com/
* Description: Synchronizes WordPress content with EveAI API.
* Version: 1.0.16
* Version: 1.1.1
* Author: Josako, Pieter Laroy
* Author URI: https://askeveai.com/about/
* License: GPL v2 or later
@@ -17,7 +17,7 @@ if (!defined('ABSPATH')) {
}
// Define plugin constants
define('EVEAI_SYNC_VERSION', '1.0.0');
define('EVEAI_SYNC_VERSION', '1.1.1');
define('EVEAI_SYNC_PLUGIN_DIR', plugin_dir_path(__FILE__));
define('EVEAI_SYNC_PLUGIN_URL', plugin_dir_url(__FILE__));
@@ -50,6 +50,30 @@ function eveai_delete_post_meta($post_id) {
}
add_action('before_delete_post', 'eveai_delete_post_meta');
// Clean metadata from Wordpress site
function eveai_reinitialize_site() {
check_ajax_referer('eveai_reinitialize_site', 'nonce');
if (!current_user_can('manage_options')) {
wp_send_json_error('You do not have permission to perform this action.');
return;
}
global $wpdb;
// Remove all EveAI-related post meta
$wpdb->query("DELETE FROM $wpdb->postmeta WHERE meta_key LIKE '_eveai_%'");
// Remove all EveAI-related options
delete_option('eveai_last_sync_time');
delete_option('eveai_sync_status');
// Optionally, you might want to clear any custom tables if you have any
wp_send_json_success('Site reinitialized. All EveAI metadata has been removed.');
}
add_action('wp_ajax_eveai_reinitialize_site', 'eveai_reinitialize_site');
// Display sync info in post
function eveai_display_sync_info($post) {
$document_id = get_post_meta($post->ID, '_eveai_document_id', true);

View File

@@ -16,6 +16,7 @@ class EveAI_Admin {
register_setting('eveai_settings', 'eveai_excluded_categories');
register_setting('eveai_settings', 'eveai_access_token');
register_setting('eveai_settings', 'eveai_token_expiry');
register_setting('eveai_settings', 'eveai_catalog_id');
}
public function add_admin_menu() {
@@ -50,6 +51,10 @@ class EveAI_Admin {
<th scope="row">API Key</th>
<td><input type="text" name="eveai_api_key" value="<?php echo esc_attr(get_option('eveai_api_key')); ?>" style="width: 100%;" /></td>
</tr>
<tr valign="top">
<th scope="row">Catalog ID</th>
<td><input type="text" name="eveai_catalog_id" value="<?php echo esc_attr(get_option('eveai_catalog_id')); ?>" style="width: 100%;" /></td>
</tr>
<tr valign="top">
<th scope="row">Default Language</th>
<td><input type="text" name="eveai_default_language" value="<?php echo esc_attr(get_option('eveai_default_language', 'en')); ?>" style="width: 100%;" /></td>
@@ -71,6 +76,11 @@ class EveAI_Admin {
<?php wp_nonce_field('eveai_bulk_sync', 'eveai_bulk_sync_nonce'); ?>
<input type="submit" name="eveai_bulk_sync" class="button button-primary" value="Start Bulk Sync">
</form>
<h2>Reinitialize Site</h2>
<p>Click the button below to remove all EveAI metadata from your site. This will reset the sync status for all posts and pages.</p>
<button id="eveai-reinitialize" class="button button-secondary">Reinitialize Site</button>
<div id="eveai-sync-results" style="margin-top: 20px;"></div>
</div>
<script>
@@ -105,6 +115,29 @@ class EveAI_Admin {
});
}
});
$('#eveai-reinitialize').on('click', function(e) {
e.preventDefault();
if (confirm('Are you sure you want to reinitialize? This will remove all EveAI metadata from your site.')) {
$.ajax({
url: ajaxurl,
type: 'POST',
data: {
action: 'eveai_reinitialize_site',
nonce: '<?php echo wp_create_nonce('eveai_reinitialize_site'); ?>'
},
success: function(response) {
if (response.success) {
alert(response.data);
} else {
alert('Error: ' + response.data);
}
},
error: function() {
alert('An error occurred. Please try again.');
}
});
}
});
});
</script>
<?php

View File

@@ -13,6 +13,7 @@ class EveAI_API {
$this->api_key = get_option('eveai_api_key');
$this->access_token = get_option('eveai_access_token');
$this->token_expiry = get_option('eveai_token_expiry', 0);
$this->catalog_id = get_option('eveai_catalog_id');
}
private function ensure_valid_token() {
@@ -111,6 +112,7 @@ class EveAI_API {
}
public function add_url($data) {
$data['catalog_id'] = get_option('eveai_catalog_id'); // Include catalog_id
return $this->make_request('POST', '/api/v1/documents/add_url', $data);
}

View File

@@ -1,5 +1,7 @@
import inspect
import logging
import sys
import os
from logging.config import fileConfig
from flask import current_app
@@ -19,8 +21,26 @@ config = context.config
# Interpret the config file for Python logging.
# This line sets up loggers basically.
fileConfig(config.config_file_name)
logger = logging.getLogger('alembic.env')
# fileConfig(config.config_file_name)
# logger = logging.getLogger('alembic.env')
logging.basicConfig(
stream=sys.stdout,
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger()
# Reset handlers to avoid issues with Alembic overriding them
for handler in logger.handlers:
logger.removeHandler(handler)
# Add a stream handler to output logs to the console
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
logger.addHandler(console_handler)
logger.setLevel(logging.INFO)
def get_engine():
@@ -125,6 +145,7 @@ def run_migrations_online():
tenants = get_tenant_ids()
for tenant in tenants:
try:
os.environ['TENANT_ID'] = str(tenant)
logger.info(f"Migrating tenant: {tenant}")
# set search path on the connection, which ensures that
# PostgreSQL will emit all CREATE / ALTER / DROP statements

View File

@@ -0,0 +1,56 @@
"""Adding Catalogs to Document Domain
Revision ID: 0f5932ff3051
Revises: 5a75fb6da7b8
Create Date: 2024-10-14 15:20:45.058329
"""
from alembic import op
import sqlalchemy as sa
import pgvector
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = '0f5932ff3051'
down_revision = '5a75fb6da7b8'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('catalog',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('name', sa.String(length=50), nullable=False),
sa.Column('description', sa.Text(), nullable=True),
sa.Column('html_tags', postgresql.ARRAY(sa.String(length=10)), nullable=True),
sa.Column('html_end_tags', postgresql.ARRAY(sa.String(length=10)), nullable=True),
sa.Column('html_included_elements', postgresql.ARRAY(sa.String(length=50)), nullable=True),
sa.Column('html_excluded_elements', postgresql.ARRAY(sa.String(length=50)), nullable=True),
sa.Column('html_excluded_classes', postgresql.ARRAY(sa.String(length=200)), nullable=True),
sa.Column('min_chunk_size', sa.Integer(), nullable=True),
sa.Column('max_chunk_size', sa.Integer(), nullable=True),
sa.Column('es_k', sa.Integer(), nullable=True),
sa.Column('es_similarity_threshold', sa.Float(), nullable=True),
sa.Column('chat_RAG_temperature', sa.Float(), nullable=True),
sa.Column('chat_no_RAG_temperature', sa.Float(), nullable=True),
sa.Column('embed_tuning', sa.Boolean(), nullable=True),
sa.Column('rag_tuning', sa.Boolean(), nullable=True),
sa.Column('created_at', sa.DateTime(), server_default=sa.text('now()'), nullable=False),
sa.Column('created_by', sa.Integer(), nullable=True),
sa.Column('updated_at', sa.DateTime(), server_default=sa.text('now()'), nullable=False),
sa.Column('updated_by', sa.Integer(), nullable=True),
sa.ForeignKeyConstraint(['created_by'], ['public.user.id'], ),
sa.ForeignKeyConstraint(['updated_by'], ['public.user.id'], ),
sa.PrimaryKeyConstraint('id')
)
op.add_column('document', sa.Column('catalog_id', sa.Integer(), nullable=True))
op.create_foreign_key(None, 'document', 'catalog', ['catalog_id'], ['id'])
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column('document', 'catalog_id')
op.drop_table('catalog')
# ### end Alembic commands ###

View File

@@ -0,0 +1,99 @@
"""Upgrading Existing documents to default catalog
Revision ID: 28984b05d396
Revises: 0f5932ff3051
Create Date: 2024-10-15 09:02:23.355660
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.orm import Session
import pgvector
from common.models.document import Catalog, Document
from common.models.user import Tenant
from flask import current_app
import logging
import os
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Create a console handler
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.INFO)
# Create a logging format
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
console_handler.setFormatter(formatter)
# Add the handler to the logger
logger.addHandler(console_handler)
logger.info("Starting revision upgrade 28984b05d396")
# revision identifiers, used by Alembic.
revision = '28984b05d396'
down_revision = '0f5932ff3051'
branch_labels = None
depends_on = None
def upgrade():
logger.info("Starting migration: Creating default catalog and updating documents.")
bind = op.get_bind()
session = Session(bind=bind)
try:
tenant_id = int(os.getenv('TENANT_ID'))
logger.info(f"In migration: tenant_id = {tenant_id}")
# Step 1: Create a new Default Catalog
logger.info("Creating default catalog")
default_catalog = Catalog(
name='Default Catalog',
description=None,
)
tenant = Tenant.query.get_or_404(tenant_id)
default_catalog.html_tags = tenant.html_tags
default_catalog.html_end_tags = tenant.html_end_tags
default_catalog.html_included_elements = tenant.html_included_elements
default_catalog.html_excluded_elements = tenant.html_excluded_elements
default_catalog.html_excluded_classes = tenant.html_excluded_classes
default_catalog.min_chunk_size = tenant.min_chunk_size
default_catalog.max_chunk_size = tenant.max_chunk_size
default_catalog.es_k = tenant.es_k
default_catalog.es_similarity_threshold = tenant.es_similarity_threshold
default_catalog.chat_RAG_temperature = tenant.chat_RAG_temperature
default_catalog.chat_no_RAG_temperature = tenant.chat_no_RAG_temperature
default_catalog.embed_tuning = tenant.embed_tuning
default_catalog.rag_tuning = tenant.rag_tuning
session.add(default_catalog)
session.commit()
new_catalog_id = default_catalog.id
logger.info(f"Default catalog created with ID: {new_catalog_id}")
# Step 2: Update all documents to use this new catalog
logger.info("Updating documents with the new catalog ID.")
documents = session.query(Document).all()
for document in documents:
document.catalog_id = new_catalog_id
session.commit()
logger.info(f"Updated {len(documents)} documents with new catalog ID.")
except Exception as e:
logger.error(f"An error occurred during migration: {e}")
session.rollback()
raise
finally:
session.close()
logger.info("Migration completed successfully.")
def downgrade():
pass

View File

@@ -26,55 +26,55 @@ def upgrade():
op.add_column('document_version', sa.Column('object_name', sa.String(length=200), nullable=True))
op.add_column('document_version', sa.Column('file_size', sa.Float(), nullable=True))
# ### Upgrade values for bucket_name, object_name and file_size to reflect minio reality ###
from common.models.document import DocumentVersion
from common.extensions import minio_client
from minio.error import S3Error
# Create a connection
connection = op.get_bind()
session = Session(bind=connection)
# Get the current schema name (which should be the tenant ID)
current_schema = connection.execute(text("SELECT current_schema()")).scalar()
tenant_id = int(current_schema)
doc_versions = session.query(DocumentVersion).all()
for doc_version in doc_versions:
try:
object_name = minio_client.generate_object_name(doc_version.doc_id,
doc_version.language,
doc_version.id,
doc_version.file_name)
bucket_name = minio_client.generate_bucket_name(tenant_id)
doc_version.object_name = object_name
doc_version.bucket_name = bucket_name
try:
stat = minio_client.client.stat_object(
bucket_name=bucket_name,
object_name=object_name
)
doc_version.file_size = stat.size / 1048576
current_app.logger.info(f"Processed Upgrade for DocumentVersion {doc_version.id} for Tenant {tenant_id}")
except S3Error as e:
if e.code == "NoSuchKey":
current_app.logger.warning(
f"Object {doc_version.object_name} not found in bucket {doc_version.bucket_name}. Skipping.")
continue # Move to the next item
else:
raise e # Handle other types of S3 errors
except Exception as e:
session.rollback()
current_app.logger.error(f"Couldn't process upgrade for DocumentVersion {doc_version.id} for "
f"Tenant {tenant_id}. Error: {str(e)}")
try:
session.commit()
current_app.logger.info(f"Successfully updated file sizes for tenant schema {current_schema}")
except Exception as e:
session.rollback()
current_app.logger.error(f"Error committing changes for tenant schema {current_schema}: {str(e)}")
# # ### Upgrade values for bucket_name, object_name and file_size to reflect minio reality ###
# from common.models.document import DocumentVersion
# from common.extensions import minio_client
# from minio.error import S3Error
#
# # Create a connection
# connection = op.get_bind()
# session = Session(bind=connection)
#
# # Get the current schema name (which should be the tenant ID)
# current_schema = connection.execute(text("SELECT current_schema()")).scalar()
# tenant_id = int(current_schema)
#
# doc_versions = session.query(DocumentVersion).all()
# for doc_version in doc_versions:
# try:
# object_name = minio_client.generate_object_name(doc_version.doc_id,
# doc_version.language,
# doc_version.id,
# doc_version.file_name)
# bucket_name = minio_client.generate_bucket_name(tenant_id)
# doc_version.object_name = object_name
# doc_version.bucket_name = bucket_name
#
# try:
# stat = minio_client.client.stat_object(
# bucket_name=bucket_name,
# object_name=object_name
# )
# doc_version.file_size = stat.size / 1048576
# current_app.logger.info(f"Processed Upgrade for DocumentVersion {doc_version.id} for Tenant {tenant_id}")
# except S3Error as e:
# if e.code == "NoSuchKey":
# current_app.logger.warning(
# f"Object {doc_version.object_name} not found in bucket {doc_version.bucket_name}. Skipping.")
# continue # Move to the next item
# else:
# raise e # Handle other types of S3 errors
# except Exception as e:
# session.rollback()
# current_app.logger.error(f"Couldn't process upgrade for DocumentVersion {doc_version.id} for "
# f"Tenant {tenant_id}. Error: {str(e)}")
#
# try:
# session.commit()
# current_app.logger.info(f"Successfully updated file sizes for tenant schema {current_schema}")
# except Exception as e:
# session.rollback()
# current_app.logger.error(f"Error committing changes for tenant schema {current_schema}: {str(e)}")
# ### commands auto generated by Alembic - Remove old fields ###
# op.drop_column('document_version', 'file_location')

View File

@@ -17,7 +17,7 @@
document.addEventListener('DOMContentLoaded', function() {
const eveAI = new EveAI(
'2',
'EveAI-CHAT-9079-8604-7441-6496-7604',
'EveAI-CHAT-6525-5692-6412-5828-7546',
'http://macstudio.ask-eve-ai-local.com',
'en',
'en,fr,nl',

42
scripts/db_squash.sh Executable file
View File

@@ -0,0 +1,42 @@
#!/usr/bin/env bash
# Usage: ./db_migrate.sh -m "Your migration message" -d migrations/public
# Load environment variables
set -a
source .env
set +a
project_dir=$(pwd)
# Ensure you're in the correct directory
cd "$project_dir" || exit 1
# Load environment variables (including database connection details)
source .env
# Set Flask app and Python path
export FLASK_APP=scripts/run_eveai_app.py
export PYTHONPATH="$PYTHONPATH:$project_dir"
while getopts m: flag
do
case "${flag}" in
m) message=${OPTARG};;
*)
echo "Invalid option: ${flag}"
exit 1
;;
esac
done
# Check if the message and directory are provided
if [ -z "$message" ]; then
echo "Message is required."
echo "Usage: ./db_migrate.sh -m \"Your migration message\" -d migrations/public"
exit 1
fi
# Run the alembic commands
flask db revision --autogenerate -m "Consolidated Public Migrations - $message" -d migrations/public
flask db revision --autogenerate -m "Consolidated Tenant Migrations - $message" -d migrations/tenant

View File

@@ -32,10 +32,12 @@ export PYTHONPATH="$PROJECT_DIR/patched_packages:$PYTHONPATH:$PROJECT_DIR" # In
# Run Alembic upgrade for the public schema
echo "Applying migrations to the public schema..."
flask db upgrade -d "${PROJECT_DIR}/migrations/public"
echo "Finished applying migrations to the public schema..."
# Run Alembic upgrade for the tenant schema
echo "Applying migrations to the tenant schema..."
flask db upgrade -d "${PROJECT_DIR}/migrations/tenant"
echo "Finished applying migrations to the tenant schema..."
# Set flask environment variables
#export FLASK_ENV=development # Use 'production' as appropriate