17 Commits

Author SHA1 Message Date
Josako
88f4db1178 - Organise retrievers 2024-11-01 11:19:55 +01:00
Josako
2df291ea91 - Organise retrievers 2024-11-01 11:19:34 +01:00
Josako
5841525b4c - When no explicit path is given in the browser, we automatically get redirected to the admin interface (eveai_app)
- Tuning moved to Retriever iso in the configuration, as this is an attribute that should be available for all types of Retrievers
2024-10-31 08:32:02 +01:00
Josako
532073d38e - Add dynamic fields to DocumentVersion in case the Catalog requires it. 2024-10-30 13:52:18 +01:00
Josako
43547287b1 - Refining & Enhancing dynamic fields
- Creating a specialized Form class for handling dynamic fields
- Refinement of HTML-macros to handle dynamic fields
- Introduction of dynamic fields for Catalogs
2024-10-29 09:17:44 +01:00
Josako
aa358df28e - Allowing for multiple types of Catalogs
- Introduction of retrievers
- Ensuring processing information is collected from Catalog iso Tenant
- Introduction of a generic Form class to enable dynamic fields based on a configuration
- Realisation of Retriever functionality to support dynamic fields
2024-10-25 14:11:47 +02:00
Josako
30fec27488 - Release script added to tag in both git and docker 2024-10-21 07:45:06 +02:00
Josako
5e77b478dd - Release script added to tag in both git and docker 2024-10-17 11:22:18 +02:00
Josako
6f71259822 - Changelog update 2024-10-17 10:35:51 +02:00
Josako
74cc7ae95e - Adapt Sync Wordpress Component to Catalog introduction
- Small bug fixes
2024-10-17 10:31:13 +02:00
Josako
7f12c8b355 - Remove obsolete fields from Tenant model (Catalog introduction) 2024-10-16 13:59:57 +02:00
Josako
6069f5f7e5 - Catalog functionality integrated into document and document_version views
- small bugfixes and improvements
2024-10-16 13:09:19 +02:00
Josako
3e644f1652 - Add Catalog Functionality 2024-10-15 18:14:57 +02:00
Josako
3316a8bc47 - Small changes to show when upgrades are finished 2024-10-14 16:40:56 +02:00
Josako
270479c77d - Add Catalog Concept to Document Domain
- Create Catalog views
- Modify document stack creation
2024-10-14 13:56:23 +02:00
Josako
0f4558d775 - Small fix in interaction view, as it still refered to file_name 2024-10-11 18:14:35 +02:00
Josako
9f5f090f0c - License Usage Calculation realised
- View License Usages
- Celery Beat container added
- First schedule in Celery Beat for calculating usage (hourly)
- repopack can now split for different components
- Various fixes as consequece of changing file_location / file_name ==> bucket_name / object_name
- Celery Routing / Queuing updated
2024-10-11 16:33:36 +02:00
110 changed files with 3375 additions and 628 deletions

1
.gitignore vendored
View File

@@ -42,3 +42,4 @@ migrations/public/.DS_Store
scripts/.DS_Store scripts/.DS_Store
scripts/__pycache__/run_eveai_app.cpython-312.pyc scripts/__pycache__/run_eveai_app.cpython-312.pyc
/eveai_repo.txt /eveai_repo.txt
*repo.txt

View File

@@ -15,7 +15,6 @@ migrations/
nginx/mime.types nginx/mime.types
*.gitignore* *.gitignore*
.python-version .python-version
.repopackignore .repopackignore*
repopack.config.json repopack.config.json
*repo.txt

View File

@@ -0,0 +1,12 @@
docker/
eveai_api/
eveai_app/
eveai_beat/
eveai_chat/
eveai_chat_workers/
eveai_entitlements/
eveai_workers/
instance/
integrations/
nginx/
scripts/

12
.repopackignore_docker Normal file
View File

@@ -0,0 +1,12 @@
common/
config/
eveai_api/
eveai_app/
eveai_beat/
eveai_chat/
eveai_chat_workers/
eveai_entitlements/
eveai_workers/
instance/
integrations/
nginx/

11
.repopackignore_eveai_api Normal file
View File

@@ -0,0 +1,11 @@
docker/
eveai_app/
eveai_beat/
eveai_chat/
eveai_chat_workers/
eveai_entitlements/
eveai_workers/
instance/
integrations/
nginx/
scripts/

11
.repopackignore_eveai_app Normal file
View File

@@ -0,0 +1,11 @@
docker/
eveai_api/
eveai_beat/
eveai_chat/
eveai_chat_workers/
eveai_entitlements/
eveai_workers/
instance/
integrations/
nginx/
scripts/

View File

@@ -0,0 +1,11 @@
docker/
eveai_api/
eveai_app/
eveai_chat/
eveai_chat_workers/
eveai_entitlements/
eveai_workers/
instance/
integrations/
nginx/
scripts/

View File

@@ -0,0 +1,11 @@
docker/
eveai_api/
eveai_app/
eveai_beat/
eveai_chat_workers/
eveai_entitlements/
eveai_workers/
instance/
integrations/
nginx/
scripts/

View File

@@ -0,0 +1,11 @@
docker/
eveai_api/
eveai_app/
eveai_beat/
eveai_chat/
eveai_entitlements/
eveai_workers/
instance/
integrations/
nginx/
scripts/

View File

@@ -0,0 +1,11 @@
docker/
eveai_api/
eveai_app/
eveai_beat/
eveai_chat/
eveai_chat_workers/
eveai_workers/
instance/
integrations/
nginx/
scripts/

View File

@@ -0,0 +1,11 @@
docker/
eveai_api/
eveai_app/
eveai_beat/
eveai_chat/
eveai_chat_workers/
eveai_entitlements/
instance/
integrations/
nginx/
scripts/

4
.repopackignore_full Normal file
View File

@@ -0,0 +1,4 @@
docker
integrations
nginx
scripts

View File

@@ -0,0 +1,13 @@
common/
config/
docker/
eveai_api/
eveai_app/
eveai_beat/
eveai_chat/
eveai_chat_workers/
eveai_entitlements/
eveai_workers/
instance/
nginx/
scripts/

11
.repopackignore_nginx Normal file
View File

@@ -0,0 +1,11 @@
docker/
eveai_api/
eveai_app/
eveai_beat/
eveai_chat/
eveai_chat_workers/
eveai_entitlements/
eveai_workers/
instance/
integrations/
scripts/

View File

@@ -25,6 +25,54 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Security ### Security
- In case of vulnerabilities. - In case of vulnerabilities.
## [1.0.13-alfa]
### Added
- Finished Catalog introduction
- Reinitialization of WordPress site for syncing
### Changed
- Modification of WordPress Sync Component
- Cleanup of attributes in Tenant
### Fixed
- Overall bugfixes as result from the Catalog introduction
## [1.0.12-alfa]
### Added
- Added Catalog functionality
### Changed
- For changes in existing functionality.
### Deprecated
- For soon-to-be removed features.
### Removed
- For now removed features.
### Fixed
- Set default language when registering Documents or URLs.
### Security
- In case of vulnerabilities.
## [1.0.11-alfa]
### Added
- License Usage Calculation realised
- View License Usages
- Celery Beat container added
- First schedule in Celery Beat for calculating usage (hourly)
### Changed
- repopack can now split for different components
### Fixed
- Various fixes as consequence of changing file_location / file_name ==> bucket_name / object_name
- Celery Routing / Queuing updated
## [1.0.10-alfa] ## [1.0.10-alfa]
### Added ### Added

View File

View File

View File

@@ -11,16 +11,22 @@ from common.utils.datetime_utils import get_date_in_timezone
from common.utils.model_utils import ModelVariables from common.utils.model_utils import ModelVariables
class EveAIRetriever(BaseRetriever, BaseModel): class EveAIDefaultRagRetriever(BaseRetriever, BaseModel):
_catalog_id: int = PrivateAttr()
_model_variables: ModelVariables = PrivateAttr() _model_variables: ModelVariables = PrivateAttr()
_tenant_info: Dict[str, Any] = PrivateAttr() _tenant_info: Dict[str, Any] = PrivateAttr()
def __init__(self, model_variables: ModelVariables, tenant_info: Dict[str, Any]): def __init__(self, catalog_id: int, model_variables: ModelVariables, tenant_info: Dict[str, Any]):
super().__init__() super().__init__()
current_app.logger.debug(f'Model variables type: {type(model_variables)}') current_app.logger.debug(f'Model variables type: {type(model_variables)}')
self._catalog_id = catalog_id
self._model_variables = model_variables self._model_variables = model_variables
self._tenant_info = tenant_info self._tenant_info = tenant_info
@property
def catalog_id(self) -> int:
return self._catalog_id
@property @property
def model_variables(self) -> ModelVariables: def model_variables(self) -> ModelVariables:
return self._model_variables return self._model_variables
@@ -38,7 +44,7 @@ class EveAIRetriever(BaseRetriever, BaseModel):
similarity_threshold = self.model_variables['similarity_threshold'] similarity_threshold = self.model_variables['similarity_threshold']
k = self.model_variables['k'] k = self.model_variables['k']
if self.tenant_info['rag_tuning']: if self.model_variables['rag_tuning']:
try: try:
current_date = get_date_in_timezone(self.tenant_info['timezone']) current_date = get_date_in_timezone(self.tenant_info['timezone'])
current_app.rag_tuning_logger.debug(f'Current date: {current_date}\n') current_app.rag_tuning_logger.debug(f'Current date: {current_date}\n')
@@ -73,7 +79,7 @@ class EveAIRetriever(BaseRetriever, BaseModel):
current_app.logger.error(f'Error generating overview: {e}') current_app.logger.error(f'Error generating overview: {e}')
db.session.rollback() db.session.rollback()
if self.tenant_info['rag_tuning']: if self.model_variables['rag_tuning']:
current_app.rag_tuning_logger.debug(f'Parameters for Retrieval of documents: \n') current_app.rag_tuning_logger.debug(f'Parameters for Retrieval of documents: \n')
current_app.rag_tuning_logger.debug(f'Similarity Threshold: {similarity_threshold}\n') current_app.rag_tuning_logger.debug(f'Similarity Threshold: {similarity_threshold}\n')
current_app.rag_tuning_logger.debug(f'K: {k}\n') current_app.rag_tuning_logger.debug(f'K: {k}\n')
@@ -100,20 +106,21 @@ class EveAIRetriever(BaseRetriever, BaseModel):
.filter( .filter(
or_(Document.valid_from.is_(None), func.date(Document.valid_from) <= current_date), or_(Document.valid_from.is_(None), func.date(Document.valid_from) <= current_date),
or_(Document.valid_to.is_(None), func.date(Document.valid_to) >= current_date), or_(Document.valid_to.is_(None), func.date(Document.valid_to) >= current_date),
(1 - db_class.embedding.cosine_distance(query_embedding)) > similarity_threshold (1 - db_class.embedding.cosine_distance(query_embedding)) > similarity_threshold,
Document.catalog_id == self._catalog_id
) )
.order_by(desc('similarity')) .order_by(desc('similarity'))
.limit(k) .limit(k)
) )
if self.tenant_info['rag_tuning']: if self.model_variables['rag_tuning']:
current_app.rag_tuning_logger.debug(f'Query executed for Retrieval of documents: \n') current_app.rag_tuning_logger.debug(f'Query executed for Retrieval of documents: \n')
current_app.rag_tuning_logger.debug(f'{query_obj.statement}\n') current_app.rag_tuning_logger.debug(f'{query_obj.statement}\n')
current_app.rag_tuning_logger.debug(f'---------------------------------------\n') current_app.rag_tuning_logger.debug(f'---------------------------------------\n')
res = query_obj.all() res = query_obj.all()
if self.tenant_info['rag_tuning']: if self.model_variables['rag_tuning']:
current_app.rag_tuning_logger.debug(f'Retrieved {len(res)} relevant documents \n') current_app.rag_tuning_logger.debug(f'Retrieved {len(res)} relevant documents \n')
current_app.rag_tuning_logger.debug(f'Data retrieved: \n') current_app.rag_tuning_logger.debug(f'Data retrieved: \n')
current_app.rag_tuning_logger.debug(f'{res}\n') current_app.rag_tuning_logger.debug(f'{res}\n')
@@ -121,7 +128,7 @@ class EveAIRetriever(BaseRetriever, BaseModel):
result = [] result = []
for doc in res: for doc in res:
if self.tenant_info['rag_tuning']: if self.model_variables['rag_tuning']:
current_app.rag_tuning_logger.debug(f'Document ID: {doc[0].id} - Distance: {doc[1]}\n') current_app.rag_tuning_logger.debug(f'Document ID: {doc[0].id} - Distance: {doc[1]}\n')
current_app.rag_tuning_logger.debug(f'Chunk: \n {doc[0].chunk}\n\n') current_app.rag_tuning_logger.debug(f'Chunk: \n {doc[0].chunk}\n\n')
result.append(f'SOURCE: {doc[0].id}\n\n{doc[0].chunk}\n\n') result.append(f'SOURCE: {doc[0].id}\n\n{doc[0].chunk}\n\n')

View File

@@ -0,0 +1,154 @@
from langchain_core.retrievers import BaseRetriever
from sqlalchemy import func, and_, or_, desc, cast, JSON
from sqlalchemy.exc import SQLAlchemyError
from pydantic import BaseModel, Field, PrivateAttr
from typing import Any, Dict, List, Optional
from flask import current_app
from contextlib import contextmanager
from common.extensions import db
from common.models.document import Document, DocumentVersion, Catalog
from common.utils.datetime_utils import get_date_in_timezone
from common.utils.model_utils import ModelVariables
class EveAIDossierRetriever(BaseRetriever, BaseModel):
_catalog_id: int = PrivateAttr()
_model_variables: ModelVariables = PrivateAttr()
_tenant_info: Dict[str, Any] = PrivateAttr()
_active_filters: Optional[Dict[str, Any]] = PrivateAttr()
def __init__(self, catalog_id: int, model_variables: ModelVariables, tenant_info: Dict[str, Any]):
super().__init__()
self._catalog_id = catalog_id
self._model_variables = model_variables
self._tenant_info = tenant_info
self._active_filters = None
@contextmanager
def filtering(self, metadata_filters: Dict[str, Any]):
"""Context manager for temporarily setting metadata filters"""
previous_filters = self._active_filters
self._active_filters = metadata_filters
try:
yield self
finally:
self._active_filters = previous_filters
def _build_metadata_filter_conditions(self, query):
"""Build SQL conditions for metadata filtering"""
if not self._active_filters:
return query
conditions = []
for field, value in self._active_filters.items():
if value is None:
continue
# Handle both single values and lists of values
if isinstance(value, (list, tuple)):
# Multiple values - create OR condition
or_conditions = []
for val in value:
or_conditions.append(
cast(DocumentVersion.user_metadata[field].astext, JSON) == str(val)
)
if or_conditions:
conditions.append(or_(*or_conditions))
else:
# Single value - direct comparison
conditions.append(
cast(DocumentVersion.user_metadata[field].astext, JSON) == str(value)
)
if conditions:
query = query.filter(and_(*conditions))
return query
def _get_relevant_documents(self, query: str):
current_app.logger.debug(f'Retrieving relevant documents for dossier query: {query}')
if self._active_filters:
current_app.logger.debug(f'Using metadata filters: {self._active_filters}')
query_embedding = self._get_query_embedding(query)
db_class = self.model_variables['embedding_db_model']
similarity_threshold = self.model_variables['similarity_threshold']
k = self.model_variables['k']
try:
current_date = get_date_in_timezone(self.tenant_info['timezone'])
# Subquery to find the latest version of each document
subquery = (
db.session.query(
DocumentVersion.doc_id,
func.max(DocumentVersion.id).label('latest_version_id')
)
.group_by(DocumentVersion.doc_id)
.subquery()
)
# Build base query
# Build base query
query_obj = (
db.session.query(db_class,
(1 - db_class.embedding.cosine_distance(query_embedding)).label('similarity'))
.join(DocumentVersion, db_class.doc_vers_id == DocumentVersion.id)
.join(Document, DocumentVersion.doc_id == Document.id)
.join(subquery, DocumentVersion.id == subquery.c.latest_version_id)
.filter(
or_(Document.valid_from.is_(None), func.date(Document.valid_from) <= current_date),
or_(Document.valid_to.is_(None), func.date(Document.valid_to) >= current_date),
(1 - db_class.embedding.cosine_distance(query_embedding)) > similarity_threshold,
Document.catalog_id == self._catalog_id
)
)
# Apply metadata filters
query_obj = self._build_metadata_filter_conditions(query_obj)
# Order and limit results
query_obj = query_obj.order_by(desc('similarity')).limit(k)
# Debug logging for RAG tuning if enabled
if self.model_variables['rag_tuning']:
self._log_rag_tuning(query_obj, query_embedding)
res = query_obj.all()
result = []
for doc in res:
if self.model_variables['rag_tuning']:
current_app.logger.debug(f'Document ID: {doc[0].id} - Distance: {doc[1]}\n')
current_app.logger.debug(f'Chunk: \n {doc[0].chunk}\n\n')
result.append(f'SOURCE: {doc[0].id}\n\n{doc[0].chunk}\n\n')
except SQLAlchemyError as e:
current_app.logger.error(f'Error retrieving relevant documents: {e}')
db.session.rollback()
return []
return result
def _log_rag_tuning(self, query_obj, query_embedding):
"""Log debug information for RAG tuning"""
current_app.rag_tuning_logger.debug("Debug: Query execution plan:")
current_app.rag_tuning_logger.debug(f"{query_obj.statement}")
if self._active_filters:
current_app.rag_tuning_logger.debug("Debug: Active metadata filters:")
current_app.rag_tuning_logger.debug(f"{self._active_filters}")
def _get_query_embedding(self, query: str):
"""Get embedding for the query text"""
embedding_model = self.model_variables['embedding_model']
query_embedding = embedding_model.embed_query(query)
return query_embedding
@property
def model_variables(self) -> ModelVariables:
return self._model_variables
@property
def tenant_info(self) -> Dict[str, Any]:
return self._tenant_info

View File

@@ -0,0 +1,40 @@
from pydantic import BaseModel, PrivateAttr
from typing import Dict, Any
from common.utils.model_utils import ModelVariables
class EveAIRetriever(BaseModel):
_catalog_id: int = PrivateAttr()
_user_metadata: Dict[str, Any] = PrivateAttr()
_system_metadata: Dict[str, Any] = PrivateAttr()
_configuration: Dict[str, Any] = PrivateAttr()
_tenant_info: Dict[str, Any] = PrivateAttr()
_model_variables: ModelVariables = PrivateAttr()
_tuning: bool = PrivateAttr()
def __init__(self, catalog_id: int, user_metadata: Dict[str, Any], system_metadata: Dict[str, Any],
configuration: Dict[str, Any]):
super().__init__()
self._catalog_id = catalog_id
self._user_metadata = user_metadata
self._system_metadata = system_metadata
self._configuration = configuration
@property
def catalog_id(self):
return self._catalog_id
@property
def user_metadata(self):
return self._user_metadata
@property
def system_metadata(self):
return self._system_metadata
@property
def configuration(self):
return self._configuration
# Any common methods that should be shared among retrievers can go here.

View File

@@ -2,12 +2,70 @@ from common.extensions import db
from .user import User, Tenant from .user import User, Tenant
from pgvector.sqlalchemy import Vector from pgvector.sqlalchemy import Vector
from sqlalchemy.dialects.postgresql import JSONB from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.dialects.postgresql import ARRAY
import sqlalchemy as sa
class Catalog(db.Model):
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String(50), nullable=False)
description = db.Column(db.Text, nullable=True)
type = db.Column(db.String(50), nullable=False, default="DEFAULT_CATALOG")
# Embedding variables
html_tags = db.Column(ARRAY(sa.String(10)), nullable=True, default=['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'li'])
html_end_tags = db.Column(ARRAY(sa.String(10)), nullable=True, default=['p', 'li'])
html_included_elements = db.Column(ARRAY(sa.String(50)), nullable=True)
html_excluded_elements = db.Column(ARRAY(sa.String(50)), nullable=True)
html_excluded_classes = db.Column(ARRAY(sa.String(200)), nullable=True)
min_chunk_size = db.Column(db.Integer, nullable=True, default=2000)
max_chunk_size = db.Column(db.Integer, nullable=True, default=3000)
# Chat variables ==> Move to Specialist?
chat_RAG_temperature = db.Column(db.Float, nullable=True, default=0.3)
chat_no_RAG_temperature = db.Column(db.Float, nullable=True, default=0.5)
# Tuning enablers
embed_tuning = db.Column(db.Boolean, nullable=True, default=False)
# Meta Data
user_metadata = db.Column(JSONB, nullable=True)
system_metadata = db.Column(JSONB, nullable=True)
configuration = db.Column(JSONB, nullable=True)
# Versioning Information
created_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now())
created_by = db.Column(db.Integer, db.ForeignKey(User.id), nullable=True)
updated_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now(), onupdate=db.func.now())
updated_by = db.Column(db.Integer, db.ForeignKey(User.id))
class Retriever(db.Model):
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String(50), nullable=False)
description = db.Column(db.Text, nullable=True)
catalog_id = db.Column(db.Integer, db.ForeignKey('catalog.id'), nullable=True)
type = db.Column(db.String(50), nullable=False, default="DEFAULT_RAG")
tuning = db.Column(db.Boolean, nullable=True, default=False)
# Meta Data
user_metadata = db.Column(JSONB, nullable=True)
system_metadata = db.Column(JSONB, nullable=True)
configuration = db.Column(JSONB, nullable=True)
# Versioning Information
created_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now())
created_by = db.Column(db.Integer, db.ForeignKey(User.id), nullable=True)
updated_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now(), onupdate=db.func.now())
updated_by = db.Column(db.Integer, db.ForeignKey(User.id))
class Document(db.Model): class Document(db.Model):
id = db.Column(db.Integer, primary_key=True) id = db.Column(db.Integer, primary_key=True)
# tenant_id = db.Column(db.Integer, db.ForeignKey(Tenant.id), nullable=False)
catalog_id = db.Column(db.Integer, db.ForeignKey(Catalog.id), nullable=True)
name = db.Column(db.String(100), nullable=False) name = db.Column(db.String(100), nullable=False)
tenant_id = db.Column(db.Integer, db.ForeignKey(Tenant.id), nullable=False)
valid_from = db.Column(db.DateTime, nullable=True) valid_from = db.Column(db.DateTime, nullable=True)
valid_to = db.Column(db.DateTime, nullable=True) valid_to = db.Column(db.DateTime, nullable=True)
@@ -37,6 +95,7 @@ class DocumentVersion(db.Model):
system_context = db.Column(db.Text, nullable=True) system_context = db.Column(db.Text, nullable=True)
user_metadata = db.Column(JSONB, nullable=True) user_metadata = db.Column(JSONB, nullable=True)
system_metadata = db.Column(JSONB, nullable=True) system_metadata = db.Column(JSONB, nullable=True)
catalog_properties = db.Column(JSONB, nullable=True)
# Versioning Information # Versioning Information
created_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now()) created_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now())
@@ -56,12 +115,6 @@ class DocumentVersion(db.Model):
def __repr__(self): def __repr__(self):
return f"<DocumentVersion {self.document_language.document_id}.{self.document_language.language}>.{self.id}>" return f"<DocumentVersion {self.document_language.document_id}.{self.document_language.language}>.{self.id}>"
def calc_file_location(self):
return f"{self.document.tenant_id}/{self.document.id}/{self.language}"
def calc_file_name(self):
return f"{self.id}.{self.file_type}"
class Embedding(db.Model): class Embedding(db.Model):
__tablename__ = 'embeddings' __tablename__ = 'embeddings'

View File

@@ -94,8 +94,8 @@ class LicenseUsage(db.Model):
id = db.Column(db.Integer, primary_key=True) id = db.Column(db.Integer, primary_key=True)
license_id = db.Column(db.Integer, db.ForeignKey('public.license.id'), nullable=False) license_id = db.Column(db.Integer, db.ForeignKey('public.license.id'), nullable=False)
tenant_id = db.Column(db.Integer, db.ForeignKey('public.tenant.id'), nullable=False) tenant_id = db.Column(db.Integer, db.ForeignKey('public.tenant.id'), nullable=False)
storage_mb_used = db.Column(db.Integer, default=0) storage_mb_used = db.Column(db.Float, default=0)
embedding_mb_used = db.Column(db.Integer, default=0) embedding_mb_used = db.Column(db.Float, default=0)
embedding_prompt_tokens_used = db.Column(db.Integer, default=0) embedding_prompt_tokens_used = db.Column(db.Integer, default=0)
embedding_completion_tokens_used = db.Column(db.Integer, default=0) embedding_completion_tokens_used = db.Column(db.Integer, default=0)
embedding_total_tokens_used = db.Column(db.Integer, default=0) embedding_total_tokens_used = db.Column(db.Integer, default=0)

View File

@@ -34,32 +34,32 @@ class Tenant(db.Model):
embedding_model = db.Column(db.String(50), nullable=True) embedding_model = db.Column(db.String(50), nullable=True)
llm_model = db.Column(db.String(50), nullable=True) llm_model = db.Column(db.String(50), nullable=True)
# Embedding variables # # Embedding variables ==> To be removed once all migrations (dev + prod) have been done
html_tags = db.Column(ARRAY(sa.String(10)), nullable=True, default=['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'li']) # html_tags = db.Column(ARRAY(sa.String(10)), nullable=True, default=['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'li'])
html_end_tags = db.Column(ARRAY(sa.String(10)), nullable=True, default=['p', 'li']) # html_end_tags = db.Column(ARRAY(sa.String(10)), nullable=True, default=['p', 'li'])
html_included_elements = db.Column(ARRAY(sa.String(50)), nullable=True) # html_included_elements = db.Column(ARRAY(sa.String(50)), nullable=True)
html_excluded_elements = db.Column(ARRAY(sa.String(50)), nullable=True) # html_excluded_elements = db.Column(ARRAY(sa.String(50)), nullable=True)
html_excluded_classes = db.Column(ARRAY(sa.String(200)), nullable=True) # html_excluded_classes = db.Column(ARRAY(sa.String(200)), nullable=True)
#
min_chunk_size = db.Column(db.Integer, nullable=True, default=2000) # min_chunk_size = db.Column(db.Integer, nullable=True, default=2000)
max_chunk_size = db.Column(db.Integer, nullable=True, default=3000) # max_chunk_size = db.Column(db.Integer, nullable=True, default=3000)
#
# Embedding search variables # # Embedding search variables
es_k = db.Column(db.Integer, nullable=True, default=5) # es_k = db.Column(db.Integer, nullable=True, default=5)
es_similarity_threshold = db.Column(db.Float, nullable=True, default=0.7) # es_similarity_threshold = db.Column(db.Float, nullable=True, default=0.7)
#
# Chat variables # # Chat variables
chat_RAG_temperature = db.Column(db.Float, nullable=True, default=0.3) # chat_RAG_temperature = db.Column(db.Float, nullable=True, default=0.3)
chat_no_RAG_temperature = db.Column(db.Float, nullable=True, default=0.5) # chat_no_RAG_temperature = db.Column(db.Float, nullable=True, default=0.5)
fallback_algorithms = db.Column(ARRAY(sa.String(50)), nullable=True) fallback_algorithms = db.Column(ARRAY(sa.String(50)), nullable=True)
# Licensing Information # Licensing Information
encrypted_chat_api_key = db.Column(db.String(500), nullable=True) encrypted_chat_api_key = db.Column(db.String(500), nullable=True)
encrypted_api_key = db.Column(db.String(500), nullable=True) encrypted_api_key = db.Column(db.String(500), nullable=True)
# Tuning enablers # # Tuning enablers
embed_tuning = db.Column(db.Boolean, nullable=True, default=False) # embed_tuning = db.Column(db.Boolean, nullable=True, default=False)
rag_tuning = db.Column(db.Boolean, nullable=True, default=False) # rag_tuning = db.Column(db.Boolean, nullable=True, default=False)
# Entitlements # Entitlements
currency = db.Column(db.String(20), nullable=True) currency = db.Column(db.String(20), nullable=True)
@@ -96,20 +96,7 @@ class Tenant(db.Model):
'allowed_languages': self.allowed_languages, 'allowed_languages': self.allowed_languages,
'embedding_model': self.embedding_model, 'embedding_model': self.embedding_model,
'llm_model': self.llm_model, 'llm_model': self.llm_model,
'html_tags': self.html_tags,
'html_end_tags': self.html_end_tags,
'html_included_elements': self.html_included_elements,
'html_excluded_elements': self.html_excluded_elements,
'html_excluded_classes': self.html_excluded_classes,
'min_chunk_size': self.min_chunk_size,
'max_chunk_size': self.max_chunk_size,
'es_k': self.es_k,
'es_similarity_threshold': self.es_similarity_threshold,
'chat_RAG_temperature': self.chat_RAG_temperature,
'chat_no_RAG_temperature': self.chat_no_RAG_temperature,
'fallback_algorithms': self.fallback_algorithms, 'fallback_algorithms': self.fallback_algorithms,
'embed_tuning': self.embed_tuning,
'rag_tuning': self.rag_tuning,
'currency': self.currency, 'currency': self.currency,
'usage_email': self.usage_email, 'usage_email': self.usage_email,
} }

View File

@@ -1,14 +1,16 @@
from celery import Celery from celery import Celery
from kombu import Queue from kombu import Queue
from werkzeug.local import LocalProxy from werkzeug.local import LocalProxy
from redbeat import RedBeatScheduler
celery_app = Celery() celery_app = Celery()
def init_celery(celery, app): def init_celery(celery, app, is_beat=False):
celery_app.main = app.name celery_app.main = app.name
app.logger.debug(f'CELERY_BROKER_URL: {app.config["CELERY_BROKER_URL"]}') app.logger.debug(f'CELERY_BROKER_URL: {app.config["CELERY_BROKER_URL"]}')
app.logger.debug(f'CELERY_RESULT_BACKEND: {app.config["CELERY_RESULT_BACKEND"]}') app.logger.debug(f'CELERY_RESULT_BACKEND: {app.config["CELERY_RESULT_BACKEND"]}')
celery_config = { celery_config = {
'broker_url': app.config.get('CELERY_BROKER_URL', 'redis://localhost:6379/0'), 'broker_url': app.config.get('CELERY_BROKER_URL', 'redis://localhost:6379/0'),
'result_backend': app.config.get('CELERY_RESULT_BACKEND', 'redis://localhost:6379/0'), 'result_backend': app.config.get('CELERY_RESULT_BACKEND', 'redis://localhost:6379/0'),
@@ -17,19 +19,40 @@ def init_celery(celery, app):
'accept_content': app.config.get('CELERY_ACCEPT_CONTENT', ['json']), 'accept_content': app.config.get('CELERY_ACCEPT_CONTENT', ['json']),
'timezone': app.config.get('CELERY_TIMEZONE', 'UTC'), 'timezone': app.config.get('CELERY_TIMEZONE', 'UTC'),
'enable_utc': app.config.get('CELERY_ENABLE_UTC', True), 'enable_utc': app.config.get('CELERY_ENABLE_UTC', True),
'task_routes': {'eveai_worker.tasks.create_embeddings': {'queue': 'embeddings',
'routing_key': 'embeddings.create_embeddings'}},
} }
if is_beat:
# Add configurations specific to Beat scheduler
celery_config['beat_scheduler'] = 'redbeat.RedBeatScheduler'
celery_config['redbeat_lock_key'] = 'redbeat::lock'
celery_config['beat_max_loop_interval'] = 10 # Adjust as needed
celery_app.conf.update(**celery_config) celery_app.conf.update(**celery_config)
# Setting up Celery task queues # Task queues for workers only
celery_app.conf.task_queues = ( if not is_beat:
Queue('default', routing_key='task.#'), celery_app.conf.task_queues = (
Queue('embeddings', routing_key='embeddings.#', queue_arguments={'x-max-priority': 10}), Queue('default', routing_key='task.#'),
Queue('llm_interactions', routing_key='llm_interactions.#', queue_arguments={'x-max-priority': 5}), Queue('embeddings', routing_key='embeddings.#', queue_arguments={'x-max-priority': 10}),
) Queue('llm_interactions', routing_key='llm_interactions.#', queue_arguments={'x-max-priority': 5}),
Queue('entitlements', routing_key='entitlements.#', queue_arguments={'x-max-priority': 10}),
)
celery_app.conf.task_routes = {
'eveai_workers.*': { # All tasks from eveai_workers module
'queue': 'embeddings',
'routing_key': 'embeddings.#',
},
'eveai_chat_workers.*': { # All tasks from eveai_chat_workers module
'queue': 'llm_interactions',
'routing_key': 'llm_interactions.#',
},
'eveai_entitlements.*': { # All tasks from eveai_entitlements module
'queue': 'entitlements',
'routing_key': 'entitlements.#',
}
}
# Ensuring tasks execute with Flask application context # Ensure tasks execute with Flask context
class ContextTask(celery.Task): class ContextTask(celery.Task):
def __call__(self, *args, **kwargs): def __call__(self, *args, **kwargs):
with app.app_context(): with app.app_context():
@@ -37,6 +60,39 @@ def init_celery(celery, app):
celery.Task = ContextTask celery.Task = ContextTask
# Original init_celery before updating for beat
# def init_celery(celery, app):
# celery_app.main = app.name
# app.logger.debug(f'CELERY_BROKER_URL: {app.config["CELERY_BROKER_URL"]}')
# app.logger.debug(f'CELERY_RESULT_BACKEND: {app.config["CELERY_RESULT_BACKEND"]}')
# celery_config = {
# 'broker_url': app.config.get('CELERY_BROKER_URL', 'redis://localhost:6379/0'),
# 'result_backend': app.config.get('CELERY_RESULT_BACKEND', 'redis://localhost:6379/0'),
# 'task_serializer': app.config.get('CELERY_TASK_SERIALIZER', 'json'),
# 'result_serializer': app.config.get('CELERY_RESULT_SERIALIZER', 'json'),
# 'accept_content': app.config.get('CELERY_ACCEPT_CONTENT', ['json']),
# 'timezone': app.config.get('CELERY_TIMEZONE', 'UTC'),
# 'enable_utc': app.config.get('CELERY_ENABLE_UTC', True),
# 'task_routes': {'eveai_worker.tasks.create_embeddings': {'queue': 'embeddings',
# 'routing_key': 'embeddings.create_embeddings'}},
# }
# celery_app.conf.update(**celery_config)
#
# # Setting up Celery task queues
# celery_app.conf.task_queues = (
# Queue('default', routing_key='task.#'),
# Queue('embeddings', routing_key='embeddings.#', queue_arguments={'x-max-priority': 10}),
# Queue('llm_interactions', routing_key='llm_interactions.#', queue_arguments={'x-max-priority': 5}),
# )
#
# # Ensuring tasks execute with Flask application context
# class ContextTask(celery.Task):
# def __call__(self, *args, **kwargs):
# with app.app_context():
# return self.run(*args, **kwargs)
#
# celery.Task = ContextTask
def make_celery(app_name, config): def make_celery(app_name, config):
return celery_app return celery_app

View File

@@ -17,15 +17,17 @@ from ..models.user import Tenant
def create_document_stack(api_input, file, filename, extension, tenant_id): def create_document_stack(api_input, file, filename, extension, tenant_id):
# Create the Document # Create the Document
new_doc = create_document(api_input, filename, tenant_id) catalog_id = int(api_input.get('catalog_id'))
new_doc = create_document(api_input, filename, catalog_id)
db.session.add(new_doc) db.session.add(new_doc)
# Create the DocumentVersion # Create the DocumentVersion
new_doc_vers = create_version_for_document(new_doc, new_doc_vers = create_version_for_document(new_doc, tenant_id,
api_input.get('url', ''), api_input.get('url', ''),
api_input.get('language', 'en'), api_input.get('language', 'en'),
api_input.get('user_context', ''), api_input.get('user_context', ''),
api_input.get('user_metadata'), api_input.get('user_metadata'),
api_input.get('catalog_properties')
) )
db.session.add(new_doc_vers) db.session.add(new_doc_vers)
@@ -45,7 +47,7 @@ def create_document_stack(api_input, file, filename, extension, tenant_id):
return new_doc, new_doc_vers return new_doc, new_doc_vers
def create_document(form, filename, tenant_id): def create_document(form, filename, catalog_id):
new_doc = Document() new_doc = Document()
if form['name'] == '': if form['name'] == '':
new_doc.name = filename.rsplit('.', 1)[0] new_doc.name = filename.rsplit('.', 1)[0]
@@ -56,13 +58,13 @@ def create_document(form, filename, tenant_id):
new_doc.valid_from = form['valid_from'] new_doc.valid_from = form['valid_from']
else: else:
new_doc.valid_from = dt.now(tz.utc) new_doc.valid_from = dt.now(tz.utc)
new_doc.tenant_id = tenant_id new_doc.catalog_id = catalog_id
set_logging_information(new_doc, dt.now(tz.utc)) set_logging_information(new_doc, dt.now(tz.utc))
return new_doc return new_doc
def create_version_for_document(document, url, language, user_context, user_metadata): def create_version_for_document(document, tenant_id, url, language, user_context, user_metadata, catalog_properties):
new_doc_vers = DocumentVersion() new_doc_vers = DocumentVersion()
if url != '': if url != '':
new_doc_vers.url = url new_doc_vers.url = url
@@ -78,11 +80,14 @@ def create_version_for_document(document, url, language, user_context, user_meta
if user_metadata != '' and user_metadata is not None: if user_metadata != '' and user_metadata is not None:
new_doc_vers.user_metadata = user_metadata new_doc_vers.user_metadata = user_metadata
if catalog_properties != '' and catalog_properties is not None:
new_doc_vers.catalog_properties = catalog_properties
new_doc_vers.document = document new_doc_vers.document = document
set_logging_information(new_doc_vers, dt.now(tz.utc)) set_logging_information(new_doc_vers, dt.now(tz.utc))
mark_tenant_storage_dirty(document.tenant_id) mark_tenant_storage_dirty(tenant_id)
return new_doc_vers return new_doc_vers
@@ -99,12 +104,12 @@ def upload_file_for_version(doc_vers, file, extension, tenant_id):
doc_vers.doc_id, doc_vers.doc_id,
doc_vers.language, doc_vers.language,
doc_vers.id, doc_vers.id,
doc_vers.file_name, f"{doc_vers.id}.{extension}",
file file
) )
doc_vers.bucket_name = bn doc_vers.bucket_name = bn
doc_vers.object_name = on doc_vers.object_name = on
doc_vers.file_size_mb = size / 1048576 # Convert bytes to MB doc_vers.file_size = size / 1048576 # Convert bytes to MB
db.session.commit() db.session.commit()
current_app.logger.info(f'Successfully saved document to MinIO for tenant {tenant_id} for ' current_app.logger.info(f'Successfully saved document to MinIO for tenant {tenant_id} for '
@@ -222,10 +227,9 @@ def process_multiple_urls(urls, tenant_id, api_input):
def start_embedding_task(tenant_id, doc_vers_id): def start_embedding_task(tenant_id, doc_vers_id):
task = current_celery.send_task('create_embeddings', queue='embeddings', args=[ task = current_celery.send_task('create_embeddings',
tenant_id, args=[tenant_id, doc_vers_id,],
doc_vers_id, queue='embeddings')
])
current_app.logger.info(f'Embedding creation started for tenant {tenant_id}, ' current_app.logger.info(f'Embedding creation started for tenant {tenant_id}, '
f'Document Version {doc_vers_id}. ' f'Document Version {doc_vers_id}. '
f'Embedding creation task: {task.id}') f'Embedding creation task: {task.id}')
@@ -273,9 +277,10 @@ def edit_document(document_id, name, valid_from, valid_to):
return None, str(e) return None, str(e)
def edit_document_version(version_id, user_context): def edit_document_version(version_id, user_context, catalog_properties):
doc_vers = DocumentVersion.query.get_or_404(version_id) doc_vers = DocumentVersion.query.get_or_404(version_id)
doc_vers.user_context = user_context doc_vers.user_context = user_context
doc_vers.catalog_properties = catalog_properties
update_logging_information(doc_vers, dt.now(tz.utc)) update_logging_information(doc_vers, dt.now(tz.utc))
try: try:
@@ -287,7 +292,7 @@ def edit_document_version(version_id, user_context):
return None, str(e) return None, str(e)
def refresh_document_with_info(doc_id, api_input): def refresh_document_with_info(doc_id, tenant_id, api_input):
doc = Document.query.get_or_404(doc_id) doc = Document.query.get_or_404(doc_id)
old_doc_vers = DocumentVersion.query.filter_by(doc_id=doc_id).order_by(desc(DocumentVersion.id)).first() old_doc_vers = DocumentVersion.query.filter_by(doc_id=doc_id).order_by(desc(DocumentVersion.id)).first()
@@ -295,11 +300,12 @@ def refresh_document_with_info(doc_id, api_input):
return None, "This document has no URL. Only documents with a URL can be refreshed." return None, "This document has no URL. Only documents with a URL can be refreshed."
new_doc_vers = create_version_for_document( new_doc_vers = create_version_for_document(
doc, doc, tenant_id,
old_doc_vers.url, old_doc_vers.url,
api_input.get('language', old_doc_vers.language), api_input.get('language', old_doc_vers.language),
api_input.get('user_context', old_doc_vers.user_context), api_input.get('user_context', old_doc_vers.user_context),
api_input.get('user_metadata', old_doc_vers.user_metadata) api_input.get('user_metadata', old_doc_vers.user_metadata),
api_input.get('catalog_properties', old_doc_vers.catalog_properties),
) )
set_logging_information(new_doc_vers, dt.now(tz.utc)) set_logging_information(new_doc_vers, dt.now(tz.utc))
@@ -319,32 +325,35 @@ def refresh_document_with_info(doc_id, api_input):
response.raise_for_status() response.raise_for_status()
file_content = response.content file_content = response.content
upload_file_for_version(new_doc_vers, file_content, extension, doc.tenant_id) upload_file_for_version(new_doc_vers, file_content, extension, tenant_id)
task = current_celery.send_task('create_embeddings', queue='embeddings', args=[ task = current_celery.send_task('create_embeddings', args=[tenant_id, new_doc_vers.id,], queue='embeddings')
doc.tenant_id, current_app.logger.info(f'Embedding creation started for document {doc_id} on version {new_doc_vers.id} '
new_doc_vers.id, f'with task id: {task.id}.')
])
return new_doc_vers, task.id return new_doc_vers, task.id
# Update the existing refresh_document function to use the new refresh_document_with_info # Update the existing refresh_document function to use the new refresh_document_with_info
def refresh_document(doc_id): def refresh_document(doc_id, tenant_id):
current_app.logger.info(f'Refreshing document {doc_id}')
doc = Document.query.get_or_404(doc_id) doc = Document.query.get_or_404(doc_id)
old_doc_vers = DocumentVersion.query.filter_by(doc_id=doc_id).order_by(desc(DocumentVersion.id)).first() old_doc_vers = DocumentVersion.query.filter_by(doc_id=doc_id).order_by(desc(DocumentVersion.id)).first()
api_input = { api_input = {
'language': old_doc_vers.language, 'language': old_doc_vers.language,
'user_context': old_doc_vers.user_context, 'user_context': old_doc_vers.user_context,
'user_metadata': old_doc_vers.user_metadata 'user_metadata': old_doc_vers.user_metadata,
'catalog_properties': old_doc_vers.catalog_properties,
} }
return refresh_document_with_info(doc_id, api_input) return refresh_document_with_info(doc_id, tenant_id, api_input)
# Function triggered when a document_version is created or updated # Function triggered when a document_version is created or updated
def mark_tenant_storage_dirty(tenant_id): def mark_tenant_storage_dirty(tenant_id):
tenant = db.session.query(Tenant).filter_by(id=tenant_id).first() tenant = db.session.query(Tenant).filter_by(id=int(tenant_id)).first()
tenant.storage_dirty = True tenant.storage_dirty = True
db.session.commit() db.session.commit()

View File

@@ -54,9 +54,7 @@ class MinioClient:
except S3Error as err: except S3Error as err:
raise Exception(f"Error occurred while uploading file: {err}") raise Exception(f"Error occurred while uploading file: {err}")
def download_document_file(self, tenant_id, document_id, language, version_id, filename): def download_document_file(self, tenant_id, bucket_name, object_name):
bucket_name = self.generate_bucket_name(tenant_id)
object_name = self.generate_object_name(document_id, language, version_id, filename)
try: try:
response = self.client.get_object(bucket_name, object_name) response = self.client.get_object(bucket_name, object_name)
return response.read() return response.read()

View File

@@ -14,7 +14,7 @@ from portkey_ai.langchain.portkey_langchain_callback_handler import LangchainCal
from common.langchain.llm_metrics_handler import LLMMetricsHandler from common.langchain.llm_metrics_handler import LLMMetricsHandler
from common.langchain.tracked_openai_embeddings import TrackedOpenAIEmbeddings from common.langchain.tracked_openai_embeddings import TrackedOpenAIEmbeddings
from common.langchain.tracked_transcribe import tracked_transcribe from common.langchain.tracked_transcribe import tracked_transcribe
from common.models.document import EmbeddingSmallOpenAI, EmbeddingLargeOpenAI from common.models.document import EmbeddingSmallOpenAI, EmbeddingLargeOpenAI, Catalog
from common.models.user import Tenant from common.models.user import Tenant
from config.model_config import MODEL_CONFIG from config.model_config import MODEL_CONFIG
from common.utils.business_event_context import current_event from common.utils.business_event_context import current_event
@@ -42,8 +42,9 @@ def set_language_prompt_template(cls, language_prompt):
class ModelVariables(MutableMapping): class ModelVariables(MutableMapping):
def __init__(self, tenant: Tenant): def __init__(self, tenant: Tenant, catalog_id=None):
self.tenant = tenant self.tenant = tenant
self.catalog_id = catalog_id
self._variables = self._initialize_variables() self._variables = self._initialize_variables()
self._embedding_model = None self._embedding_model = None
self._llm = None self._llm = None
@@ -57,25 +58,32 @@ class ModelVariables(MutableMapping):
def _initialize_variables(self): def _initialize_variables(self):
variables = {} variables = {}
# We initialize the variables that are available knowing the tenant. For the other, we will apply 'lazy loading' # Get the Catalog if catalog_id is passed
variables['k'] = self.tenant.es_k or 5 if self.catalog_id:
variables['similarity_threshold'] = self.tenant.es_similarity_threshold or 0.7 catalog = Catalog.query.get_or_404(self.catalog_id)
variables['RAG_temperature'] = self.tenant.chat_RAG_temperature or 0.3
variables['no_RAG_temperature'] = self.tenant.chat_no_RAG_temperature or 0.5 # We initialize the variables that are available knowing the tenant.
variables['embed_tuning'] = self.tenant.embed_tuning or False variables['embed_tuning'] = catalog.embed_tuning or False
variables['rag_tuning'] = self.tenant.rag_tuning or False
# Set HTML Chunking Variables
variables['html_tags'] = catalog.html_tags
variables['html_end_tags'] = catalog.html_end_tags
variables['html_included_elements'] = catalog.html_included_elements
variables['html_excluded_elements'] = catalog.html_excluded_elements
variables['html_excluded_classes'] = catalog.html_excluded_classes
# Set Chunk Size variables
variables['min_chunk_size'] = catalog.min_chunk_size
variables['max_chunk_size'] = catalog.max_chunk_size
# Set the RAG Context (will have to change once specialists are defined
variables['rag_context'] = self.tenant.rag_context or " " variables['rag_context'] = self.tenant.rag_context or " "
# Temporary setting until we have Specialists
# Set HTML Chunking Variables variables['rag_tuning'] = False
variables['html_tags'] = self.tenant.html_tags variables['RAG_temperature'] = 0.3
variables['html_end_tags'] = self.tenant.html_end_tags variables['no_RAG_temperature'] = 0.5
variables['html_included_elements'] = self.tenant.html_included_elements variables['k'] = 8
variables['html_excluded_elements'] = self.tenant.html_excluded_elements variables['similarity_threshold'] = 0.4
variables['html_excluded_classes'] = self.tenant.html_excluded_classes
# Set Chunk Size variables
variables['min_chunk_size'] = self.tenant.min_chunk_size
variables['max_chunk_size'] = self.tenant.max_chunk_size
# Set model providers # Set model providers
variables['embedding_provider'], variables['embedding_model'] = self.tenant.embedding_model.rsplit('.', 1) variables['embedding_provider'], variables['embedding_model'] = self.tenant.embedding_model.rsplit('.', 1)
@@ -195,7 +203,12 @@ class ModelVariables(MutableMapping):
return self.transcription_client return self.transcription_client
elif key in self._variables.get('prompt_templates', []): elif key in self._variables.get('prompt_templates', []):
return self.get_prompt_template(key) return self.get_prompt_template(key)
return self._variables.get(key) else:
value = self._variables.get(key)
if value is not None:
return value
else:
raise KeyError(f'Variable {key} does not exist in ModelVariables')
def __setitem__(self, key: str, value: Any) -> None: def __setitem__(self, key: str, value: Any) -> None:
self._variables[key] = value self._variables[key] = value
@@ -225,8 +238,8 @@ class ModelVariables(MutableMapping):
return self._variables.values() return self._variables.values()
def select_model_variables(tenant): def select_model_variables(tenant, catalog_id=None):
model_variables = ModelVariables(tenant=tenant) model_variables = ModelVariables(tenant=tenant, catalog_id=catalog_id)
return model_variables return model_variables

View File

@@ -6,7 +6,6 @@ def prefixed_url_for(endpoint, **values):
prefix = request.headers.get('X-Forwarded-Prefix', '') prefix = request.headers.get('X-Forwarded-Prefix', '')
scheme = request.headers.get('X-Forwarded-Proto', request.scheme) scheme = request.headers.get('X-Forwarded-Proto', request.scheme)
host = request.headers.get('Host', request.host) host = request.headers.get('Host', request.host)
current_app.logger.debug(f'prefix: {prefix}, scheme: {scheme}, host: {host}')
external = values.pop('_external', False) external = values.pop('_external', False)
generated_url = url_for(endpoint, **values) generated_url = url_for(endpoint, **values)

53
config/catalog_types.py Normal file
View File

@@ -0,0 +1,53 @@
# Catalog Types
CATALOG_TYPES = {
"DEFAULT": {
"name": "Default Catalog",
"Description": "A Catalog with information in Evie's Library, to be considered as a whole",
"configuration": {}
},
"DOSSIER": {
"name": "Dossier Catalog",
"Description": "A Catalog with information in Evie's Library in which several Dossiers can be stored",
"configuration": {
"tagging_fields": {
"name": "Tagging Fields",
"type": "tagging_fields",
"description": """Define the metadata fields that will be used for tagging documents.
Each field must have:
- type: one of 'string', 'integer', 'float', 'date', 'enum'
- required: boolean indicating if the field is mandatory
- description: field description
- allowed_values: list of values (for enum type only)
- min_value/max_value: range limits (for numeric types only)""",
"required": True,
"default": {},
"field_properties": {
"type": {
"allowed_values": ["string", "integer", "float", "date", "enum"],
"required": True
},
"required": {
"type": "boolean",
"default": False
},
"description": {
"type": "string"
},
"allowed_values": {
"type": "list",
"description": "For enum type fields only"
},
"min_value": {
"type": "number",
"description": "For numeric fields only"
},
"max_value": {
"type": "number",
"description": "For numeric fields only"
}
}
}
},
"document_version_configurations": ["tagging_fields"]
},
}

View File

@@ -37,7 +37,7 @@ LOGGING = {
'level': 'DEBUG', 'level': 'DEBUG',
'class': 'logging.handlers.RotatingFileHandler', 'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/eveai_app.log', 'filename': 'logs/eveai_app.log',
'maxBytes': 1024 * 1024 * 5, # 5MB 'maxBytes': 1024 * 1024 * 1, # 1MB
'backupCount': 10, 'backupCount': 10,
'formatter': 'standard', 'formatter': 'standard',
}, },
@@ -45,7 +45,7 @@ LOGGING = {
'level': 'DEBUG', 'level': 'DEBUG',
'class': 'logging.handlers.RotatingFileHandler', 'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/eveai_workers.log', 'filename': 'logs/eveai_workers.log',
'maxBytes': 1024 * 1024 * 5, # 5MB 'maxBytes': 1024 * 1024 * 1, # 1MB
'backupCount': 10, 'backupCount': 10,
'formatter': 'standard', 'formatter': 'standard',
}, },
@@ -53,7 +53,7 @@ LOGGING = {
'level': 'DEBUG', 'level': 'DEBUG',
'class': 'logging.handlers.RotatingFileHandler', 'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/eveai_chat.log', 'filename': 'logs/eveai_chat.log',
'maxBytes': 1024 * 1024 * 5, # 5MB 'maxBytes': 1024 * 1024 * 1, # 1MB
'backupCount': 10, 'backupCount': 10,
'formatter': 'standard', 'formatter': 'standard',
}, },
@@ -61,7 +61,7 @@ LOGGING = {
'level': 'DEBUG', 'level': 'DEBUG',
'class': 'logging.handlers.RotatingFileHandler', 'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/eveai_chat_workers.log', 'filename': 'logs/eveai_chat_workers.log',
'maxBytes': 1024 * 1024 * 5, # 5MB 'maxBytes': 1024 * 1024 * 1, # 1MB
'backupCount': 10, 'backupCount': 10,
'formatter': 'standard', 'formatter': 'standard',
}, },
@@ -69,7 +69,23 @@ LOGGING = {
'level': 'DEBUG', 'level': 'DEBUG',
'class': 'logging.handlers.RotatingFileHandler', 'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/eveai_api.log', 'filename': 'logs/eveai_api.log',
'maxBytes': 1024 * 1024 * 5, # 5MB 'maxBytes': 1024 * 1024 * 1, # 1MB
'backupCount': 10,
'formatter': 'standard',
},
'file_beat': {
'level': 'DEBUG',
'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/eveai_beat.log',
'maxBytes': 1024 * 1024 * 1, # 1MB
'backupCount': 10,
'formatter': 'standard',
},
'file_entitlements': {
'level': 'DEBUG',
'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/eveai_entitlements.log',
'maxBytes': 1024 * 1024 * 1, # 1MB
'backupCount': 10, 'backupCount': 10,
'formatter': 'standard', 'formatter': 'standard',
}, },
@@ -77,7 +93,7 @@ LOGGING = {
'level': 'DEBUG', 'level': 'DEBUG',
'class': 'logging.handlers.RotatingFileHandler', 'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/sqlalchemy.log', 'filename': 'logs/sqlalchemy.log',
'maxBytes': 1024 * 1024 * 5, # 5MB 'maxBytes': 1024 * 1024 * 1, # 1MB
'backupCount': 10, 'backupCount': 10,
'formatter': 'standard', 'formatter': 'standard',
}, },
@@ -85,7 +101,7 @@ LOGGING = {
'level': 'DEBUG', 'level': 'DEBUG',
'class': 'logging.handlers.RotatingFileHandler', 'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/mailman.log', 'filename': 'logs/mailman.log',
'maxBytes': 1024 * 1024 * 5, # 5MB 'maxBytes': 1024 * 1024 * 1, # 1MB
'backupCount': 10, 'backupCount': 10,
'formatter': 'standard', 'formatter': 'standard',
}, },
@@ -93,7 +109,7 @@ LOGGING = {
'level': 'DEBUG', 'level': 'DEBUG',
'class': 'logging.handlers.RotatingFileHandler', 'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/security.log', 'filename': 'logs/security.log',
'maxBytes': 1024 * 1024 * 5, # 5MB 'maxBytes': 1024 * 1024 * 1, # 1MB
'backupCount': 10, 'backupCount': 10,
'formatter': 'standard', 'formatter': 'standard',
}, },
@@ -101,7 +117,7 @@ LOGGING = {
'level': 'DEBUG', 'level': 'DEBUG',
'class': 'logging.handlers.RotatingFileHandler', 'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/rag_tuning.log', 'filename': 'logs/rag_tuning.log',
'maxBytes': 1024 * 1024 * 5, # 5MB 'maxBytes': 1024 * 1024 * 1, # 1MB
'backupCount': 10, 'backupCount': 10,
'formatter': 'standard', 'formatter': 'standard',
}, },
@@ -109,7 +125,7 @@ LOGGING = {
'level': 'DEBUG', 'level': 'DEBUG',
'class': 'logging.handlers.RotatingFileHandler', 'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/embed_tuning.log', 'filename': 'logs/embed_tuning.log',
'maxBytes': 1024 * 1024 * 5, # 5MB 'maxBytes': 1024 * 1024 * 1, # 1MB
'backupCount': 10, 'backupCount': 10,
'formatter': 'standard', 'formatter': 'standard',
}, },
@@ -117,7 +133,7 @@ LOGGING = {
'level': 'INFO', 'level': 'INFO',
'class': 'logging.handlers.RotatingFileHandler', 'class': 'logging.handlers.RotatingFileHandler',
'filename': 'logs/business_events.log', 'filename': 'logs/business_events.log',
'maxBytes': 1024 * 1024 * 5, # 5MB 'maxBytes': 1024 * 1024 * 1, # 1MB
'backupCount': 10, 'backupCount': 10,
'formatter': 'standard', 'formatter': 'standard',
}, },
@@ -172,6 +188,16 @@ LOGGING = {
'level': 'DEBUG', 'level': 'DEBUG',
'propagate': False 'propagate': False
}, },
'eveai_beat': { # logger for the eveai_beat
'handlers': ['file_beat', 'graylog', ] if env == 'production' else ['file_beat', ],
'level': 'DEBUG',
'propagate': False
},
'eveai_entitlements': { # logger for the eveai_entitlements
'handlers': ['file_entitlements', 'graylog', ] if env == 'production' else ['file_entitlements', ],
'level': 'DEBUG',
'propagate': False
},
'sqlalchemy.engine': { # logger for the sqlalchemy 'sqlalchemy.engine': { # logger for the sqlalchemy
'handlers': ['file_sqlalchemy', 'graylog', ] if env == 'production' else ['file_sqlalchemy', ], 'handlers': ['file_sqlalchemy', 'graylog', ] if env == 'production' else ['file_sqlalchemy', ],
'level': 'DEBUG', 'level': 'DEBUG',

23
config/retriever_types.py Normal file
View File

@@ -0,0 +1,23 @@
# Retriever Types
RETRIEVER_TYPES = {
"DEFAULT_RAG": {
"name": "Default RAG",
"description": "Retrieving all embeddings conform the query",
"configuration": {
"es_k": {
"name": "es_k",
"type": "int",
"description": "K-value to retrieve embeddings (max embeddings retrieved)",
"required": True,
"default": 8,
},
"es_similarity_threshold": {
"name": "es_similarity_threshold",
"type": "float",
"description": "Similarity threshold for retrieving embeddings",
"required": True,
"default": 0.3,
},
}
}
}

View File

@@ -169,4 +169,5 @@ for SERVICE in "${SERVICES[@]}"; do
fi fi
done done
echo "All specified services processed." echo -e "\033[35mAll specified services processed.\033[0m"
echo -e "\033[35mFinished at $(date +"%d/%m/%Y %H:%M:%S")\033[0m"

View File

@@ -231,6 +231,59 @@ services:
networks: networks:
- eveai-network - eveai-network
eveai_beat:
image: josakola/eveai_beat:latest
build:
context: ..
dockerfile: ./docker/eveai_beat/Dockerfile
platforms:
- linux/amd64
- linux/arm64
environment:
<<: *common-variables
COMPONENT_NAME: eveai_beat
volumes:
- ../eveai_beat:/app/eveai_beat
- ../common:/app/common
- ../config:/app/config
- ../scripts:/app/scripts
- ../patched_packages:/app/patched_packages
- eveai_logs:/app/logs
depends_on:
redis:
condition: service_healthy
networks:
- eveai-network
eveai_entitlements:
image: josakola/eveai_entitlements:latest
build:
context: ..
dockerfile: ./docker/eveai_entitlements/Dockerfile
platforms:
- linux/amd64
- linux/arm64
environment:
<<: *common-variables
COMPONENT_NAME: eveai_entitlements
volumes:
- ../eveai_entitlements:/app/eveai_entitlements
- ../common:/app/common
- ../config:/app/config
- ../scripts:/app/scripts
- ../patched_packages:/app/patched_packages
- eveai_logs:/app/logs
depends_on:
db:
condition: service_healthy
redis:
condition: service_healthy
minio:
condition: service_healthy
networks:
- eveai-network
db: db:
hostname: db hostname: db
image: ankane/pgvector image: ankane/pgvector

View File

@@ -145,6 +145,28 @@ services:
networks: networks:
- eveai-network - eveai-network
eveai_beat:
platform: linux/amd64
image: josakola/eveai_beat:latest
environment:
<<: *common-variables
COMPONENT_NAME: eveai_beat
volumes:
- eveai_logs:/app/logs
networks:
- eveai-network
eveai_entitlements:
platform: linux/amd64
image: josakola/eveai_entitlements:latest
environment:
<<: *common-variables
COMPONENT_NAME: eveai_entitlements
volumes:
- eveai_logs:/app/logs
networks:
- eveai-network
flower: flower:
image: josakola/flower:latest image: josakola/flower:latest
environment: environment:

View File

@@ -0,0 +1,65 @@
ARG PYTHON_VERSION=3.12.3
FROM python:${PYTHON_VERSION}-slim as base
# Prevents Python from writing pyc files.
ENV PYTHONDONTWRITEBYTECODE=1
# Keeps Python from buffering stdout and stderr to avoid situations where
# the application crashes without emitting any logs due to buffering.
ENV PYTHONUNBUFFERED=1
# Create directory for patched packages and set permissions
RUN mkdir -p /app/patched_packages && \
chmod 777 /app/patched_packages
# Ensure patches are applied to the application.
ENV PYTHONPATH=/app/patched_packages:$PYTHONPATH
WORKDIR /app
# Create a non-privileged user that the app will run under.
# See https://docs.docker.com/go/dockerfile-user-best-practices/
ARG UID=10001
RUN adduser \
--disabled-password \
--gecos "" \
--home "/nonexistent" \
--shell "/bin/bash" \
--no-create-home \
--uid "${UID}" \
appuser
# Install necessary packages and build tools
#RUN apt-get update && apt-get install -y \
# build-essential \
# gcc \
# && apt-get clean \
# && rm -rf /var/lib/apt/lists/*
# Create logs directory and set permissions
RUN mkdir -p /app/logs && chown -R appuser:appuser /app/logs
# Install Python dependencies.
# Download dependencies as a separate step to take advantage of Docker's caching.
# Leverage a cache mount to /root/.cache/pip to speed up subsequent builds.
# Leverage a bind mount to requirements.txt to avoid having to copy them into
# into this layer.
COPY requirements.txt /app/
RUN python -m pip install -r /app/requirements.txt
# Copy the source code into the container.
COPY eveai_beat /app/eveai_beat
COPY common /app/common
COPY config /app/config
COPY scripts /app/scripts
COPY patched_packages /app/patched_packages
COPY --chown=root:root scripts/entrypoint_no_db.sh /app/scripts/
# Set ownership of the application directory to the non-privileged user
RUN chown -R appuser:appuser /app
# Set entrypoint and command
ENTRYPOINT ["/app/scripts/entrypoint_no_db.sh"]
CMD ["/app/scripts/start_eveai_beat.sh"]

View File

@@ -0,0 +1,69 @@
ARG PYTHON_VERSION=3.12.3
FROM python:${PYTHON_VERSION}-slim as base
# Prevents Python from writing pyc files.
ENV PYTHONDONTWRITEBYTECODE=1
# Keeps Python from buffering stdout and stderr to avoid situations where
# the application crashes without emitting any logs due to buffering.
ENV PYTHONUNBUFFERED=1
# Create directory for patched packages and set permissions
RUN mkdir -p /app/patched_packages && \
chmod 777 /app/patched_packages
# Ensure patches are applied to the application.
ENV PYTHONPATH=/app/patched_packages:$PYTHONPATH
WORKDIR /app
# Create a non-privileged user that the app will run under.
# See https://docs.docker.com/go/dockerfile-user-best-practices/
ARG UID=10001
RUN adduser \
--disabled-password \
--gecos "" \
--home "/nonexistent" \
--shell "/bin/bash" \
--no-create-home \
--uid "${UID}" \
appuser
# Install necessary packages and build tools
RUN apt-get update && apt-get install -y \
build-essential \
gcc \
postgresql-client \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
# Create logs directory and set permissions
RUN mkdir -p /app/logs && chown -R appuser:appuser /app/logs
# Install Python dependencies.
# Download dependencies as a separate step to take advantage of Docker's caching.
# Leverage a cache mount to /root/.cache/pip to speed up subsequent builds.
# Leverage a bind mount to requirements.txt to avoid having to copy them into
# into this layer.
COPY requirements.txt /app/
RUN python -m pip install -r /app/requirements.txt
# Copy the source code into the container.
COPY eveai_entitlements /app/eveai_entitlements
COPY common /app/common
COPY config /app/config
COPY scripts /app/scripts
COPY patched_packages /app/patched_packages
COPY --chown=root:root scripts/entrypoint.sh /app/scripts/
# Set permissions for entrypoint script
RUN chmod 777 /app/scripts/entrypoint.sh
# Set ownership of the application directory to the non-privileged user
RUN chown -R appuser:appuser /app
# Set entrypoint and command
ENTRYPOINT ["/app/scripts/entrypoint.sh"]
CMD ["/app/scripts/start_eveai_entitlements.sh"]

62
docker/release_and_tag_eveai.sh Executable file
View File

@@ -0,0 +1,62 @@
#!/bin/bash
# Initialize variables
RELEASE_VERSION=""
RELEASE_MESSAGE=""
DOCKER_ACCOUNT="josakola" # Your Docker account name
# Parse input arguments
while getopts r:m: flag
do
case "${flag}" in
r) RELEASE_VERSION=${OPTARG};;
m) RELEASE_MESSAGE=${OPTARG};;
*)
echo "Usage: $0 -r <release_version> -m <release_message>"
exit 1 ;;
esac
done
# Ensure both version and message are provided
if [ -z "$RELEASE_VERSION" ]; then
echo "Error: Release version not provided. Use -r <release_version>"
exit 1
fi
if [ -z "$RELEASE_MESSAGE" ]; then
echo "Error: Release message not provided. Use -m <release_message>"
exit 1
fi
# Path to your docker-compose file
DOCKER_COMPOSE_FILE="compose_dev.yaml"
# Get all the images defined in docker-compose
IMAGES=$(docker compose -f $DOCKER_COMPOSE_FILE config | grep 'image:' | awk '{ print $2 }')
# Start tagging only relevant images
for DOCKER_IMAGE in $IMAGES; do
# Check if the image belongs to your Docker account and ends with :latest
if [[ $DOCKER_IMAGE == $DOCKER_ACCOUNT* && $DOCKER_IMAGE == *:latest ]]; then
# Remove the ":latest" tag to use the base image name
BASE_IMAGE=${DOCKER_IMAGE%:latest}
echo "Tagging Docker image: $BASE_IMAGE with version: $RELEASE_VERSION"
# Tag the 'latest' image with the new release version
docker tag $DOCKER_IMAGE $BASE_IMAGE:$RELEASE_VERSION
# Push the newly tagged image to Docker Hub
docker push $BASE_IMAGE:$RELEASE_VERSION
else
echo "Skipping image: $DOCKER_IMAGE (not part of $DOCKER_ACCOUNT or not tagged as latest)"
fi
done
# Step 3: Tag the Git repository with the release version
echo "Tagging Git repository with version: $RELEASE_VERSION"
git tag -a v$RELEASE_VERSION -m "Release $RELEASE_VERSION: $RELEASE_MESSAGE"
git push origin v$RELEASE_VERSION
echo -e "\033[35mRelease process completed for version: $RELEASE_VERSION \033[0m"
echo -e "\033[35mFinished at $(date +"%d/%m/%Y %H:%M:%S")\033[0m"

View File

@@ -56,13 +56,6 @@ def create_app(config_file=None):
app.logger.debug(f'Request URL: {request.url}') app.logger.debug(f'Request URL: {request.url}')
app.logger.debug(f'Request headers: {dict(request.headers)}') app.logger.debug(f'Request headers: {dict(request.headers)}')
# Log request arguments
app.logger.debug(f'Request args: {request.args}')
# Log form data if it's a POST request
if request.method == 'POST':
app.logger.debug(f'Form data: {request.form}')
# Log JSON data if the content type is application/json # Log JSON data if the content type is application/json
if request.is_json: if request.is_json:
app.logger.debug(f'JSON data: {request.json}') app.logger.debug(f'JSON data: {request.json}')
@@ -95,6 +88,10 @@ def create_app(config_file=None):
# Don't raise the exception here, let the request continue # Don't raise the exception here, let the request continue
# The appropriate error handling will be done in the specific endpoints # The appropriate error handling will be done in the specific endpoints
@app.route('/api/v1')
def swagger():
return api_rest.render_doc()
return app return app

View File

@@ -33,6 +33,7 @@ document_ns = Namespace('documents', description='Document related operations')
# Define models for request parsing and response serialization # Define models for request parsing and response serialization
upload_parser = reqparse.RequestParser() upload_parser = reqparse.RequestParser()
upload_parser.add_argument('catalog_id', location='form', type=int, required=True, help='The catalog to add the file to')
upload_parser.add_argument('file', location='files', type=FileStorage, required=True, help='The file to upload') upload_parser.add_argument('file', location='files', type=FileStorage, required=True, help='The file to upload')
upload_parser.add_argument('name', location='form', type=str, required=False, help='Name of the document') upload_parser.add_argument('name', location='form', type=str, required=False, help='Name of the document')
upload_parser.add_argument('language', location='form', type=str, required=True, help='Language of the document') upload_parser.add_argument('language', location='form', type=str, required=True, help='Language of the document')
@@ -42,6 +43,9 @@ upload_parser.add_argument('valid_from', location='form', type=validate_date, re
help='Valid from date for the document (ISO format)') help='Valid from date for the document (ISO format)')
upload_parser.add_argument('user_metadata', location='form', type=validate_json, required=False, upload_parser.add_argument('user_metadata', location='form', type=validate_json, required=False,
help='User metadata for the document (JSON format)') help='User metadata for the document (JSON format)')
upload_parser.add_argument('catalog_properties', location='form', type=validate_json, required=False,
help='The catalog configuration to be passed along (JSON format). Validity is against catalog requirements '
'is not checked, and is the responsibility of the calling client.')
add_document_response = document_ns.model('AddDocumentResponse', { add_document_response = document_ns.model('AddDocumentResponse', {
'message': fields.String(description='Status message'), 'message': fields.String(description='Status message'),
@@ -75,11 +79,13 @@ class AddDocument(Resource):
validate_file_type(extension) validate_file_type(extension)
api_input = { api_input = {
'catalog_id': args.get('catalog_id'),
'name': args.get('name') or filename, 'name': args.get('name') or filename,
'language': args.get('language'), 'language': args.get('language'),
'user_context': args.get('user_context'), 'user_context': args.get('user_context'),
'valid_from': args.get('valid_from'), 'valid_from': args.get('valid_from'),
'user_metadata': args.get('user_metadata'), 'user_metadata': args.get('user_metadata'),
'catalog_properties': args.get('catalog_properties'),
} }
new_doc, new_doc_vers = create_document_stack(api_input, file, filename, extension, tenant_id) new_doc, new_doc_vers = create_document_stack(api_input, file, filename, extension, tenant_id)
@@ -102,13 +108,18 @@ class AddDocument(Resource):
# Models for AddURL # Models for AddURL
add_url_model = document_ns.model('AddURL', { add_url_model = document_ns.model('AddURL', {
'catalog_id': fields.Integer(required='True', description='ID of the catalog the URL needs to be added to'),
'url': fields.String(required=True, description='URL of the document to add'), 'url': fields.String(required=True, description='URL of the document to add'),
'name': fields.String(required=False, description='Name of the document'), 'name': fields.String(required=False, description='Name of the document'),
'language': fields.String(required=True, description='Language of the document'), 'language': fields.String(required=True, description='Language of the document'),
'user_context': fields.String(required=False, description='User context for the document'), 'user_context': fields.String(required=False, description='User context for the document'),
'valid_from': fields.String(required=False, description='Valid from date for the document'), 'valid_from': fields.String(required=False, description='Valid from date for the document'),
'user_metadata': fields.String(required=False, description='User metadata for the document'), 'user_metadata': fields.String(required=False, description='User metadata for the document'),
'system_metadata': fields.String(required=False, description='System metadata for the document') 'system_metadata': fields.String(required=False, description='System metadata for the document'),
'catalog_properties': fields.String(required=False, description='The catalog configuration to be passed along (JSON '
'format). Validity is against catalog requirements '
'is not checked, and is the responsibility of the '
'calling client.'),
}) })
add_url_response = document_ns.model('AddURLResponse', { add_url_response = document_ns.model('AddURLResponse', {
@@ -138,12 +149,14 @@ class AddURL(Resource):
file_content, filename, extension = process_url(args['url'], tenant_id) file_content, filename, extension = process_url(args['url'], tenant_id)
api_input = { api_input = {
'catalog_id': args['catalog_id'],
'url': args['url'], 'url': args['url'],
'name': args.get('name') or filename, 'name': args.get('name') or filename,
'language': args['language'], 'language': args['language'],
'user_context': args.get('user_context'), 'user_context': args.get('user_context'),
'valid_from': args.get('valid_from'), 'valid_from': args.get('valid_from'),
'user_metadata': args.get('user_metadata'), 'user_metadata': args.get('user_metadata'),
'catalog_properties': args.get('catalog_properties'),
} }
new_doc, new_doc_vers = create_document_stack(api_input, file_content, filename, extension, tenant_id) new_doc, new_doc_vers = create_document_stack(api_input, file_content, filename, extension, tenant_id)
@@ -213,7 +226,8 @@ class DocumentResource(Resource):
@document_ns.response(200, 'Document refreshed successfully') @document_ns.response(200, 'Document refreshed successfully')
def post(self, document_id): def post(self, document_id):
"""Refresh a document""" """Refresh a document"""
new_version, result = refresh_document(document_id) tenant_id = get_jwt_identity()
new_version, result = refresh_document(document_id, tenant_id)
if new_version: if new_version:
return {'message': f'Document refreshed. New version: {new_version.id}. Task ID: {result}'}, 200 return {'message': f'Document refreshed. New version: {new_version.id}. Task ID: {result}'}, 200
else: else:
@@ -222,6 +236,7 @@ class DocumentResource(Resource):
edit_document_version_model = document_ns.model('EditDocumentVersion', { edit_document_version_model = document_ns.model('EditDocumentVersion', {
'user_context': fields.String(required=True, description='New user context for the document version'), 'user_context': fields.String(required=True, description='New user context for the document version'),
'catalog_properties': fields.String(required=True, description='New catalog properties for the document version'),
}) })
@@ -234,7 +249,7 @@ class DocumentVersionResource(Resource):
def put(self, version_id): def put(self, version_id):
"""Edit a document version""" """Edit a document version"""
data = request.json data = request.json
updated_version, error = edit_document_version(version_id, data['user_context']) updated_version, error = edit_document_version(version_id, data['user_context'], data.get('catalog_properties'))
if updated_version: if updated_version:
return {'message': f'Document Version {updated_version.id} updated successfully'}, 200 return {'message': f'Document Version {updated_version.id} updated successfully'}, 200
else: else:
@@ -246,7 +261,8 @@ refresh_document_model = document_ns.model('RefreshDocument', {
'name': fields.String(required=False, description='New name for the document'), 'name': fields.String(required=False, description='New name for the document'),
'language': fields.String(required=False, description='Language of the document'), 'language': fields.String(required=False, description='Language of the document'),
'user_context': fields.String(required=False, description='User context for the document'), 'user_context': fields.String(required=False, description='User context for the document'),
'user_metadata': fields.Raw(required=False, description='User metadata for the document') 'user_metadata': fields.Raw(required=False, description='User metadata for the document'),
'catalog_properties': fields.Raw(required=False, description='Catalog properties for the document'),
}) })
@@ -263,7 +279,7 @@ class RefreshDocument(Resource):
current_app.logger.info(f'Refreshing document {document_id} for tenant {tenant_id}') current_app.logger.info(f'Refreshing document {document_id} for tenant {tenant_id}')
try: try:
new_version, result = refresh_document(document_id) new_version, result = refresh_document(document_id, tenant_id)
if new_version: if new_version:
return { return {
@@ -296,7 +312,7 @@ class RefreshDocumentWithInfo(Resource):
try: try:
api_input = request.json api_input = request.json
new_version, result = refresh_document_with_info(document_id, api_input) new_version, result = refresh_document_with_info(document_id, tenant_id, api_input)
if new_version: if new_version:
return { return {

View File

@@ -46,7 +46,7 @@ def check_database():
def check_celery(): def check_celery():
try: try:
# Send a simple task to Celery # Send a simple task to Celery
result = current_celery.send_task('tasks.ping', queue='embeddings') result = current_celery.send_task('ping', queue='eveai_workers.ping')
response = result.get(timeout=10) # Wait for up to 10 seconds for a response response = result.get(timeout=10) # Wait for up to 10 seconds for a response
return response == 'pong' return response == 'pong'
except CeleryTimeoutError: except CeleryTimeoutError:

View File

@@ -136,6 +136,8 @@ def register_blueprints(app):
app.register_blueprint(interaction_bp) app.register_blueprint(interaction_bp)
from .views.entitlements_views import entitlements_bp from .views.entitlements_views import entitlements_bp
app.register_blueprint(entitlements_bp) app.register_blueprint(entitlements_bp)
from .views.administration_views import administration_bp
app.register_blueprint(administration_bp)
from .views.healthz_views import healthz_bp, init_healtz from .views.healthz_views import healthz_bp, init_healtz
app.register_blueprint(healthz_bp) app.register_blueprint(healthz_bp)
init_healtz(app) init_healtz(app)

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,22 @@
{% extends 'base.html' %}
{% from "macros.html" import render_selectable_table, render_pagination, render_field %}
{% block title %}Trigger Actions{% endblock %}
{% block content_title %}Trigger Actions{% endblock %}
{% block content_description %}Manually trigger batch actions{% endblock %}
{% block content %}
<!-- Trigger action Form -->
<form method="POST" action="{{ url_for('administration_bp.handle_trigger_action') }}">
<div class="form-group mt-3">
<button type="submit" name="action" value="update_usages" class="btn btn-secondary">Update Usages</button>
</div>
</form>
{% endblock %}
{% block content_footer %}
{% endblock %}
{% block scripts %}
{% endblock %}

View File

@@ -11,9 +11,17 @@
{{ form.hidden_tag() }} {{ form.hidden_tag() }}
{% set disabled_fields = [] %} {% set disabled_fields = [] %}
{% set exclude_fields = [] %} {% set exclude_fields = [] %}
{% for field in form %} {% for field in form.get_static_fields() %}
{{ render_field(field, disabled_fields, exclude_fields) }} {{ render_field(field, disabled_fields, exclude_fields) }}
{% endfor %} {% endfor %}
{% for collection_name, fields in form.get_dynamic_fields().items() %}
{% if fields|length > 0 %}
<h4 class="mt-4">{{ collection_name }}</h4>
{% endif %}
{% for field in fields %}
{{ render_field(field, disabled_fields, exclude_fields) }}
{% endfor %}
{% endfor %}
<button type="submit" class="btn btn-primary">Add Document</button> <button type="submit" class="btn btn-primary">Add Document</button>
</form> </form>
{% endblock %} {% endblock %}

View File

@@ -0,0 +1,23 @@
{% extends 'base.html' %}
{% from "macros.html" import render_field %}
{% block title %}Catalog Registration{% endblock %}
{% block content_title %}Register Catalog{% endblock %}
{% block content_description %}Define a new catalog of documents in Evie's Library{% endblock %}
{% block content %}
<form method="post">
{{ form.hidden_tag() }}
{% set disabled_fields = [] %}
{% set exclude_fields = [] %}
{% for field in form %}
{{ render_field(field, disabled_fields, exclude_fields) }}
{% endfor %}
<button type="submit" class="btn btn-primary">Register Catalog</button>
</form>
{% endblock %}
{% block content_footer %}
{% endblock %}

View File

@@ -0,0 +1,24 @@
{% extends 'base.html' %}
{% from 'macros.html' import render_selectable_table, render_pagination %}
{% block title %}Documents{% endblock %}
{% block content_title %}Catalogs{% endblock %}
{% block content_description %}View Catalogs for Tenant{% endblock %}
{% block content_class %}<div class="col-xl-12 col-lg-5 col-md-7 mx-auto"></div>{% endblock %}
{% block content %}
<div class="container">
<form method="POST" action="{{ url_for('document_bp.handle_catalog_selection') }}">
{{ render_selectable_table(headers=["Catalog ID", "Name"], rows=rows, selectable=True, id="catalogsTable") }}
<div class="form-group mt-3">
<button type="submit" name="action" value="set_session_catalog" class="btn btn-primary">Set Session Catalog</button>
<button type="submit" name="action" value="edit_catalog" class="btn btn-primary">Edit Catalog</button>
</div>
</form>
</div>
{% endblock %}
{% block content_footer %}
{{ render_pagination(pagination, 'document_bp.catalogs') }}
{% endblock %}

View File

@@ -10,7 +10,7 @@
{% block content %} {% block content %}
<div class="container"> <div class="container">
<form method="POST" action="{{ url_for('document_bp.handle_document_version_selection') }}"> <form method="POST" action="{{ url_for('document_bp.handle_document_version_selection') }}">
{{ render_selectable_table(headers=["ID", "URL", "File Loc.", "File Name", "File Type", "Process.", "Proces. Start", "Proces. Finish", "Proces. Error"], rows=rows, selectable=True, id="versionsTable") }} {{ render_selectable_table(headers=["ID", "URL", "Object Name", "File Type", "Process.", "Proces. Start", "Proces. Finish", "Proces. Error"], rows=rows, selectable=True, id="versionsTable") }}
<div class="form-group mt-3"> <div class="form-group mt-3">
<button type="submit" name="action" value="edit_document_version" class="btn btn-primary">Edit Document Version</button> <button type="submit" name="action" value="edit_document_version" class="btn btn-primary">Edit Document Version</button>
<button type="submit" name="action" value="process_document_version" class="btn btn-danger">Process Document Version</button> <button type="submit" name="action" value="process_document_version" class="btn btn-danger">Process Document Version</button>

View File

@@ -23,15 +23,23 @@
{{ render_collapsible_section('Filter', 'Filter Options', filter_form) }} {{ render_collapsible_section('Filter', 'Filter Options', filter_form) }}
<!-- Document Versions Table --> <div class="form-group mt-3">
{{ render_selectable_sortable_table( <form method="POST" action="{{ url_for('document_bp.handle_document_version_selection') }}">
headers=["ID", "File Type", "Processing", "Processing Start", "Processing Finish", "Processing Error"], <!-- Document Versions Table -->
rows=rows, {{ render_selectable_sortable_table(
selectable=True, headers=["ID", "File Type", "Processing", "Processing Start", "Processing Finish", "Processing Error"],
id="documentVersionsTable", rows=rows,
sort_by=sort_by, selectable=True,
sort_order=sort_order id="documentVersionsTable",
) }} sort_by=sort_by,
sort_order=sort_order
) }}
<div class="form-group mt-4">
<button type="submit" name="action" value="edit_document_version" class="btn btn-primary">Edit Document Version</button>
<button type="submit" name="action" value="process_document_version" class="btn btn-danger">Process Document Version</button>
</div>
</form>
</div>
{% endblock %} {% endblock %}
{% block content_footer %} {% block content_footer %}

View File

@@ -1,5 +1,5 @@
{% extends 'base.html' %} {% extends 'base.html' %}
{% from 'macros.html' import render_selectable_table, render_pagination %} {% from 'macros.html' import render_selectable_table, render_pagination, render_filter_field, render_date_filter_field, render_collapsible_section, render_selectable_sortable_table_with_dict_headers %}
{% block title %}Documents{% endblock %} {% block title %}Documents{% endblock %}
@@ -8,18 +8,88 @@
{% block content_class %}<div class="col-xl-12 col-lg-5 col-md-7 mx-auto"></div>{% endblock %} {% block content_class %}<div class="col-xl-12 col-lg-5 col-md-7 mx-auto"></div>{% endblock %}
{% block content %} {% block content %}
<div class="container"> <!-- Filter Form -->
<form method="POST" action="{{ url_for('document_bp.handle_document_selection') }}"> {% set filter_form %}
{{ render_selectable_table(headers=["Document ID", "Name", "Valid From", "Valid To"], rows=rows, selectable=True, id="documentsTable") }} <form method="GET" action="{{ url_for('document_bp.documents') }}">
<div class="form-group mt-3"> {{ render_filter_field('catalog_id', 'Catalog', filter_options['catalog_id'], filters.get('catalog_id', [])) }}
<button type="submit" name="action" value="edit_document" class="btn btn-primary">Edit Document</button> {{ render_filter_field('validity', 'Validity', filter_options['validity'], filters.get('validity', [])) }}
<button type="submit" name="action" value="document_versions" class="btn btn-secondary">Show Document Versions</button>
<button type="submit" name="action" value="refresh_document" class="btn btn-secondary">Refresh Document (new version)</button> <button type="submit" class="btn btn-primary">Apply Filters</button>
</div> </form>
</form> {% endset %}
</div>
{{ render_collapsible_section('Filter', 'Filter Options', filter_form) }}
<div class="form-group mt-3">
<form method="POST" action="{{ url_for('document_bp.handle_document_selection') }}">
<!-- Documents Table -->
{{ render_selectable_sortable_table_with_dict_headers(
headers=[
{"text": "ID", "sort": "id"},
{"text": "Name", "sort": "name"},
{"text": "Catalog", "sort": "catalog_name"},
{"text": "Valid From", "sort": "valid_from"},
{"text": "Valid To", "sort": "valid_to"}
],
rows=rows,
selectable=True,
id="documentsTable",
sort_by=sort_by,
sort_order=sort_order
) }}
<div class="form-group mt-4">
<button type="submit" name="action" value="edit_document" class="btn btn-primary">Edit Document</button>
<button type="submit" name="action" value="document_versions" class="btn btn-secondary">Show Document Versions</button>
<button type="submit" name="action" value="refresh_document" class="btn btn-secondary">Refresh Document (new version)</button>
</div>
</form>
</div>
{% endblock %} {% endblock %}
{% block content_footer %} {% block content_footer %}
{{ render_pagination(pagination, 'document_bp.documents') }} {{ render_pagination(pagination, 'document_bp.documents') }}
{% endblock %} {% endblock %}
{% block scripts %}
<script>
document.addEventListener('DOMContentLoaded', function() {
const table = document.getElementById('documentsTable');
const headers = table.querySelectorAll('th.sortable');
headers.forEach(header => {
header.addEventListener('click', function() {
const sortBy = this.dataset.sort;
let sortOrder = 'asc';
if (this.querySelector('.fa-sort-up')) {
sortOrder = 'desc';
} else if (this.querySelector('.fa-sort-down')) {
sortOrder = 'none';
}
window.location.href = updateQueryStringParameter(window.location.href, 'sort_by', sortBy);
window.location.href = updateQueryStringParameter(window.location.href, 'sort_order', sortOrder);
});
});
function updateQueryStringParameter(uri, key, value) {
var re = new RegExp("([?&])" + key + "=.*?(&|$)", "i");
var separator = uri.indexOf('?') !== -1 ? "&" : "?";
if (uri.match(re)) {
return uri.replace(re, '$1' + key + "=" + value + '$2');
}
else {
return uri + separator + key + "=" + value;
}
}
table.addEventListener('change', function(event) {
if (event.target.type === 'radio') {
var selectedRow = event.target.closest('tr');
var documentId = selectedRow.cells[1].textContent;
console.log('Selected Document ID:', documentId);
}
});
});
</script>
{% endblock %}

View File

@@ -0,0 +1,35 @@
{% extends 'base.html' %}
{% from "macros.html" import render_field %}
{% block title %}Edit Catalog{% endblock %}
{% block content_title %}Edit Catalog{% endblock %}
{% block content_description %}Edit a catalog of documents in Evie's Library.
When you change chunking of embedding information, you'll need to manually refresh the library if you want immediate impact.
{% endblock %}
{% block content %}
<form method="post">
{{ form.hidden_tag() }}
{% set disabled_fields = ['type'] %}
{% set exclude_fields = [] %}
<!-- Render Static Fields -->
{% for field in form.get_static_fields() %}
{{ render_field(field, disabled_fields, exclude_fields) }}
{% endfor %}
<!-- Render Dynamic Fields -->
{% for collection_name, fields in form.get_dynamic_fields().items() %}
{% if fields|length > 0 %}
<h4 class="mt-4">{{ collection_name }}</h4>
{% endif %}
{% for field in fields %}
{{ render_field(field, disabled_fields, exclude_fields) }}
{% endfor %}
{% endfor %}
<button type="submit" class="btn btn-primary">Save Retriever</button>
</form>
{% endblock %}
{% block content_footer %}
{% endblock %}

View File

@@ -8,11 +8,17 @@
{% block content %} {% block content %}
<form method="post"> <form method="post">
{{ form.hidden_tag() }} {{ form.hidden_tag() }}
{% set disabled_fields = [] %} {% set disabled_fields = [] %}
{% set exclude_fields = [] %} {% set exclude_fields = [] %}
{% for field in form %}
{{ render_field(field, disabled_fields, exclude_fields) }} {{ render_field(form.name, disabled_fields, exclude_fields) }}
{% endfor %} {{ render_field(form.valid_from, disabled_fields, exclude_fields) }}
{{ render_field(form.valid_to, disabled_fields, exclude_fields) }}
<div class="form-group">
<label for="catalog_name">Catalog</label>
<input type="text" class="form-control" id="catalog_name" value="{{ catalog_name }}" readonly>
</div>
<button type="submit" class="btn btn-primary">Update Document</button> <button type="submit" class="btn btn-primary">Update Document</button>
</form> </form>
{% endblock %} {% endblock %}

View File

@@ -0,0 +1,33 @@
{% extends 'base.html' %}
{% from "macros.html" import render_field %}
{% block title %}Edit Retriever{% endblock %}
{% block content_title %}Edit Retriever{% endblock %}
{% block content_description %}Edit a Retriever (for a Catalog){% endblock %}
{% block content %}
<form method="post">
{{ form.hidden_tag() }}
{% set disabled_fields = ['type'] %}
{% set exclude_fields = [] %}
<!-- Render Static Fields -->
{% for field in form.get_static_fields() %}
{{ render_field(field, disabled_fields, exclude_fields) }}
{% endfor %}
<!-- Render Dynamic Fields -->
{% for collection_name, fields in form.get_dynamic_fields().items() %}
{% if fields|length > 0 %}
<h4 class="mt-4">{{ collection_name }}</h4>
{% endif %}
{% for field in fields %}
{{ render_field(field, disabled_fields, exclude_fields) }}
{% endfor %}
{% endfor %}
<button type="submit" class="btn btn-primary">Save Retriever</button>
</form>
{% endblock %}
{% block content_footer %}
{% endblock %}

View File

@@ -0,0 +1,23 @@
{% extends 'base.html' %}
{% from "macros.html" import render_field %}
{% block title %}Retriever Registration{% endblock %}
{% block content_title %}Register Retriever{% endblock %}
{% block content_description %}Define a new retriever (for a catalog){% endblock %}
{% block content %}
<form method="post">
{{ form.hidden_tag() }}
{% set disabled_fields = [] %}
{% set exclude_fields = [] %}
{% for field in form %}
{{ render_field(field, disabled_fields, exclude_fields) }}
{% endfor %}
<button type="submit" class="btn btn-primary">Register Retriever</button>
</form>
{% endblock %}
{% block content_footer %}
{% endblock %}

View File

@@ -0,0 +1,23 @@
{% extends 'base.html' %}
{% from 'macros.html' import render_selectable_table, render_pagination %}
{% block title %}Retrievers{% endblock %}
{% block content_title %}Retrievers{% endblock %}
{% block content_description %}View Retrieers for Tenant{% endblock %}
{% block content_class %}<div class="col-xl-12 col-lg-5 col-md-7 mx-auto"></div>{% endblock %}
{% block content %}
<div class="container">
<form method="POST" action="{{ url_for('document_bp.handle_retriever_selection') }}">
{{ render_selectable_table(headers=["Retriever ID", "Name", "Type", "Catalog ID"], rows=rows, selectable=True, id="retrieverssTable") }}
<div class="form-group mt-3">
<button type="submit" name="action" value="edit_retriever" class="btn btn-primary">Edit Retriever</button>
</div>
</form>
</div>
{% endblock %}
{% block content_footer %}
{{ render_pagination(pagination, 'document_bp.retrievers') }}
{% endblock %}

View File

@@ -0,0 +1,28 @@
{% extends 'base.html' %}
{% from "macros.html" import render_selectable_table, render_pagination %}
{% block title %}View License Usage{% endblock %}
{% block content_title %}View License Usage{% endblock %}
{% block content_description %}View License Usage{% endblock %}
{% block content %}
<form action="{{ url_for('user_bp.handle_user_action') }}" method="POST">
{{ render_selectable_table(headers=["Usage ID", "Start Date", "End Date", "Storage (MiB)", "Embedding (MiB)", "Interaction (tokens)"], rows=rows, selectable=False, id="usagesTable") }}
<!-- <div class="form-group mt-3">-->
<!-- <button type="submit" name="action" value="edit_user" class="btn btn-primary">Edit Selected User</button>-->
<!-- <button type="submit" name="action" value="resend_confirmation_email" class="btn btn-secondary">Resend Confirmation Email</button>-->
<!-- <button type="submit" name="action" value="send_password_reset_email" class="btn btn-secondary">Send Password Reset Email</button>-->
<!-- <button type="submit" name="action" value="reset_uniquifier" class="btn btn-secondary">Reset Uniquifier</button>-->
<!-- &lt;!&ndash; Additional buttons can be added here for other actions &ndash;&gt;-->
<!-- </div>-->
</form>
{% endblock %}
{% block content_footer %}
{{ render_pagination(pagination, 'user_bp.select_tenant') }}
{% endblock %}
{% block scripts %}
{% endblock %}

View File

@@ -1,5 +1,5 @@
<header class="header-2"> <header class="header-2">
<div class="page-header min-vh-25" style="background-image: url({{url_for('static', filename='/assets/img/EveAI_bg.jpg')}})" loading="lazy"> <div class="page-header min-vh-25" style="background-image: url({{url_for('static', filename='/assets/img/EveAI_bg.jpg')}}); background-position: top left; background-repeat: no-repeat; background-size: cover;" loading="lazy">
<span class="mask bg-gradient-primary opacity-4"></span> <span class="mask bg-gradient-primary opacity-4"></span>
<div class="container"> <div class="container">
<div class="row"> <div class="row">

View File

@@ -54,7 +54,7 @@
{% if embedding.url %} {% if embedding.url %}
<a href="{{ embedding.url }}" target="_blank">{{ embedding.url }}</a> <a href="{{ embedding.url }}" target="_blank">{{ embedding.url }}</a>
{% else %} {% else %}
{{ embedding.file_name }} {{ embedding.object_name }}
{% endif %} {% endif %}
</li> </li>
{% endfor %} {% endfor %}

View File

@@ -1,18 +1,136 @@
{% macro render_field(field, disabled_fields=[], exclude_fields=[], class='') %} <!--{% macro render_field(field, disabled_fields=[], exclude_fields=[], class='') %}-->
<!-- {% set disabled = field.name in disabled_fields %}-->
<!-- {% set exclude_fields = exclude_fields + ['csrf_token', 'submit'] %}-->
<!-- {% if field.name not in exclude_fields %}-->
<!-- {% if field.type == 'BooleanField' %}-->
<!-- <div class="form-check">-->
<!-- {{ field(class="form-check-input " + class, type="checkbox", id="flexSwitchCheckDefault") }}-->
<!-- {{ field.label(class="form-check-label", for="flexSwitchCheckDefault", disabled=disabled) }}-->
<!-- </div>-->
<!-- {% else %}-->
<!-- <div class="form-group">-->
<!-- {{ field.label(class="form-label") }}-->
<!-- {{ field(class="form-control " + class, disabled=disabled) }}-->
<!-- {% if field.errors %}-->
<!-- <div class="invalid-feedback">-->
<!-- {% for error in field.errors %}-->
<!-- {{ error }}-->
<!-- {% endfor %}-->
<!-- </div>-->
<!-- {% endif %}-->
<!-- </div>-->
<!-- {% endif %}-->
<!-- {% endif %}-->
<!--{% endmacro %}-->
{% macro render_field_old(field, disabled_fields=[], exclude_fields=[], class='') %}
<!-- Debug info -->
<!-- Field name: {{ field.name }}, Field type: {{ field.__class__.__name__ }} -->
{% set disabled = field.name in disabled_fields %} {% set disabled = field.name in disabled_fields %}
{% set exclude_fields = exclude_fields + ['csrf_token', 'submit'] %} {% set exclude_fields = exclude_fields + ['csrf_token', 'submit'] %}
{% if field.name not in exclude_fields %} {% if field.name not in exclude_fields %}
{% if field.type == 'BooleanField' %} {% if field.type == 'BooleanField' %}
<div class="form-check"> <div class="form-group">
{{ field(class="form-check-input " + class, type="checkbox", id="flexSwitchCheckDefault") }} <div class="form-check form-switch">
{{ field.label(class="form-check-label", for="flexSwitchCheckDefault", disabled=disabled) }} {{ field(class="form-check-input " + class, disabled=disabled) }}
{% if field.description %}
{{ field.label(class="form-check-label",
**{'data-bs-toggle': 'tooltip',
'data-bs-placement': 'right',
'title': field.description}) }}
{% else %}
{{ field.label(class="form-check-label") }}
{% endif %}
</div>
{% if field.errors %}
<div class="invalid-feedback d-block">
{% for error in field.errors %}
{{ error }}
{% endfor %}
</div>
{% endif %}
</div> </div>
{% else %} {% else %}
<div class="form-group"> <div class="form-group">
{{ field.label(class="form-label") }} {% if field.description %}
{{ field(class="form-control " + class, disabled=disabled) }} {{ field.label(class="form-label",
**{'data-bs-toggle': 'tooltip',
'data-bs-placement': 'right',
'title': field.description}) }}
{% else %}
{{ field.label(class="form-label") }}
{% endif %}
{% if field.type == 'TextAreaField' and 'json-editor' in class %}
<div id="{{ field.id }}-editor" class="json-editor-container"></div>
{{ field(class="form-control d-none " + class, disabled=disabled) }}
{% else %}
{{ field(class="form-control " + class, disabled=disabled) }}
{% endif %}
{% if field.errors %} {% if field.errors %}
<div class="invalid-feedback"> <div class="invalid-feedback d-block">
{% for error in field.errors %}
{{ error }}
{% endfor %}
</div>
{% endif %}
</div>
{% endif %}
{% endif %}
{% endmacro %}
{% macro render_field(field, disabled_fields=[], exclude_fields=[], class='') %}
<!-- Debug info -->
<!-- Field name: {{ field.name }}, Field type: {{ field.__class__.__name__ }} -->
{% set disabled = field.name in disabled_fields %}
{% set exclude_fields = exclude_fields + ['csrf_token', 'submit'] %}
{% if field.name not in exclude_fields %}
{% if field.type == 'BooleanField' %}
<div class="form-group">
<div class="form-check form-switch">
{{ field(class="form-check-input " + class, disabled=disabled) }}
{% if field.description %}
{{ field.label(class="form-check-label",
**{'data-bs-toggle': 'tooltip',
'data-bs-placement': 'right',
'title': field.description}) }}
{% else %}
{{ field.label(class="form-check-label") }}
{% endif %}
</div>
{% if field.errors %}
<div class="invalid-feedback d-block">
{% for error in field.errors %}
{{ error }}
{% endfor %}
</div>
{% endif %}
</div>
{% else %}
<div class="form-group">
{% if field.description %}
{{ field.label(class="form-label",
**{'data-bs-toggle': 'tooltip',
'data-bs-placement': 'right',
'title': field.description}) }}
{% else %}
{{ field.label(class="form-label") }}
{% endif %}
{% if field.type == 'TextAreaField' and 'json-editor' in class %}
<div id="{{ field.id }}-editor" class="json-editor-container"></div>
{{ field(class="form-control d-none " + class, disabled=disabled) }}
{% elif field.type == 'SelectField' %}
{{ field(class="form-control form-select " + class, disabled=disabled) }}
{% else %}
{{ field(class="form-control " + class, disabled=disabled) }}
{% endif %}
{% if field.errors %}
<div class="invalid-feedback d-block">
{% for error in field.errors %} {% for error in field.errors %}
{{ error }} {{ error }}
{% endfor %} {% endfor %}
@@ -177,6 +295,48 @@
</div> </div>
{% endmacro %} {% endmacro %}
{% macro render_selectable_sortable_table_with_dict_headers(headers, rows, selectable, id, sort_by, sort_order) %}
<div class="card">
<div class="table-responsive">
<table class="table align-items-center mb-0" id="{{ id }}">
<thead>
<tr>
{% if selectable %}
<th class="text-uppercase text-secondary text-xxs font-weight-bolder opacity-7">Select</th>
{% endif %}
{% for header in headers %}
<th class="text-uppercase text-secondary text-xxs font-weight-bolder opacity-7 sortable" data-sort="{{ header['sort'] }}">
{{ header['text'] }}
{% if sort_by == header['sort'] %}
{% if sort_order == 'asc' %}
<i class="fas fa-sort-up"></i>
{% elif sort_order == 'desc' %}
<i class="fas fa-sort-down"></i>
{% endif %}
{% else %}
<i class="fas fa-sort"></i>
{% endif %}
</th>
{% endfor %}
</tr>
</thead>
<tbody>
{% for row in rows %}
<tr>
{% if selectable %}
<td><input type="radio" name="selected_row" value="{{ row[0].value }}"></td>
{% endif %}
{% for cell in row %}
<td>{{ cell.value }}</td>
{% endfor %}
</tr>
{% endfor %}
</tbody>
</table>
</div>
</div>
{% endmacro %}
{% macro render_accordion(accordion_id, accordion_items, header_title, header_description) %} {% macro render_accordion(accordion_id, accordion_items, header_title, header_description) %}
<div class="accordion-1"> <div class="accordion-1">
<div class="container"> <div class="container">

View File

@@ -81,6 +81,10 @@
{% endif %} {% endif %}
{% if current_user.is_authenticated %} {% if current_user.is_authenticated %}
{{ dropdown('Document Mgmt', 'note_stack', [ {{ dropdown('Document Mgmt', 'note_stack', [
{'name': 'Add Catalog', 'url': '/document/catalog', 'roles': ['Super User', 'Tenant Admin']},
{'name': 'All Catalogs', 'url': '/document/catalogs', 'roles': ['Super User', 'Tenant Admin']},
{'name': 'Add Retriever', 'url': '/document/retriever', 'roles': ['Super User', 'Tenant Admin']},
{'name': 'All Retrievers', 'url': '/document/retrievers', 'roles': ['Super User', 'Tenant Admin']},
{'name': 'Add Document', 'url': '/document/add_document', 'roles': ['Super User', 'Tenant Admin']}, {'name': 'Add Document', 'url': '/document/add_document', 'roles': ['Super User', 'Tenant Admin']},
{'name': 'Add URL', 'url': '/document/add_url', 'roles': ['Super User', 'Tenant Admin']}, {'name': 'Add URL', 'url': '/document/add_url', 'roles': ['Super User', 'Tenant Admin']},
{'name': 'Add a list of URLs', 'url': '/document/add_urls', 'roles': ['Super User', 'Tenant Admin']}, {'name': 'Add a list of URLs', 'url': '/document/add_urls', 'roles': ['Super User', 'Tenant Admin']},
@@ -98,6 +102,8 @@
{{ dropdown('Administration', 'settings', [ {{ dropdown('Administration', 'settings', [
{'name': 'License Tier Registration', 'url': '/entitlements/license_tier', 'roles': ['Super User']}, {'name': 'License Tier Registration', 'url': '/entitlements/license_tier', 'roles': ['Super User']},
{'name': 'All License Tiers', 'url': '/entitlements/view_license_tiers', 'roles': ['Super User']}, {'name': 'All License Tiers', 'url': '/entitlements/view_license_tiers', 'roles': ['Super User']},
{'name': 'Trigger Actions', 'url': '/administration/trigger_actions', 'roles': ['Super User']},
{'name': 'Usage', 'url': '/entitlements/view_usages', 'roles': ['Super User', 'Tenant Admin']},
]) }} ]) }}
{% endif %} {% endif %}
{% if current_user.is_authenticated %} {% if current_user.is_authenticated %}
@@ -112,6 +118,17 @@
{% endif %} {% endif %}
</ul> </ul>
{% if current_user.is_authenticated %} {% if current_user.is_authenticated %}
<ul class="navbar-nav d-lg-block d-none">
<li class="nav-item">
<a href="/document/catalogs" class="btn btn-sm bg-gradient-primary mb-0 me-2">
{% if 'catalog_name' in session %}
CATALOG: {{ session['catalog_name'] }}
{% else %}
CHOOSE CATALOG
{% endif %}
</a>
</li>
</ul>
<ul class="navbar-nav d-lg-block d-none"> <ul class="navbar-nav d-lg-block d-none">
<li class="nav-item"> <li class="nav-item">
<a href="/session_defaults" class="btn btn-sm bg-gradient-primary mb-0"> <a href="/session_defaults" class="btn btn-sm bg-gradient-primary mb-0">

View File

@@ -14,4 +14,58 @@
<script src="{{url_for('static', filename='assets/js/material-kit-pro.min.js')}}?v=3.0.4 type="text/javascript"></script> <script src="{{url_for('static', filename='assets/js/material-kit-pro.min.js')}}?v=3.0.4 type="text/javascript"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/bootstrap/5.3.3/js/bootstrap.bundle.min.js"></script> <script src="https://cdnjs.cloudflare.com/ajax/libs/bootstrap/5.3.3/js/bootstrap.bundle.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/select2/4.0.13/js/select2.min.js"></script> <script src="https://cdnjs.cloudflare.com/ajax/libs/select2/4.0.13/js/select2.min.js"></script>
<link href="https://cdnjs.cloudflare.com/ajax/libs/jsoneditor/10.1.0/jsoneditor.min.css" rel="stylesheet" type="text/css">
<script src="https://cdnjs.cloudflare.com/ajax/libs/jsoneditor/10.1.0/jsoneditor.min.js"></script>
<script>
document.addEventListener('DOMContentLoaded', function() {
// Initialize tooltips
var tooltipTriggerList = [].slice.call(document.querySelectorAll('[data-bs-toggle="tooltip"]'))
var tooltipList = tooltipTriggerList.map(function (tooltipTriggerEl) {
return new bootstrap.Tooltip(tooltipTriggerEl)
});
// Initialize JSON editors
document.querySelectorAll('.json-editor').forEach(function(textarea) {
// Create container for editor
var container = document.getElementById(textarea.id + '-editor');
// Initialize the editor
var editor = new JSONEditor(container, {
mode: 'code',
modes: ['code', 'tree'],
onChangeText: function(jsonString) {
textarea.value = jsonString;
}
});
// Set initial value
try {
const initialValue = textarea.value ? JSON.parse(textarea.value) : {};
editor.set(initialValue);
} catch (e) {
console.error('Error parsing initial JSON:', e);
editor.set({});
}
// Add validation indicator
editor.validate().then(function(errors) {
if (errors.length) {
container.style.border = '2px solid red';
} else {
container.style.border = '1px solid #ccc';
}
});
});
});
</script>
<style>
.json-editor-container {
height: 400px;
margin-bottom: 1rem;
}
.tooltip {
position: fixed;
}
</style>

View File

@@ -10,7 +10,7 @@
<form method="post"> <form method="post">
{{ form.hidden_tag() }} {{ form.hidden_tag() }}
<!-- Main Tenant Information --> <!-- Main Tenant Information -->
{% set main_fields = ['name', 'website', 'default_language', 'allowed_languages', 'rag_context', 'type'] %} {% set main_fields = ['name', 'website', 'default_language', 'allowed_languages', 'timezone','rag_context', 'type'] %}
{% for field in form %} {% for field in form %}
{{ render_included_field(field, disabled_fields=[], include_fields=main_fields) }} {{ render_included_field(field, disabled_fields=[], include_fields=main_fields) }}
{% endfor %} {% endfor %}
@@ -30,21 +30,6 @@
License Information License Information
</a> </a>
</li> </li>
<li class="nav-item">
<a class="nav-link mb-0 px-0 py-1" data-toggle="tab" href="#chunking-tab" role="tab" aria-controls="chunking" aria-selected="false">
Chunking
</a>
</li>
<li class="nav-item">
<a class="nav-link mb-0 px-0 py-1" data-toggle="tab" href="#embedding-search-tab" role="tab" aria-controls="html-chunking" aria-selected="false">
Embedding Search
</a>
</li>
<li class="nav-item">
<a class="nav-link mb-0 px-0 py-1" data-toggle="tab" href="#tuning-tab" role="tab" aria-controls="html-chunking" aria-selected="false">
Tuning
</a>
</li>
</ul> </ul>
</div> </div>
<div class="tab-content tab-space"> <div class="tab-content tab-space">
@@ -62,8 +47,10 @@
{{ render_included_field(field, disabled_fields=[], include_fields=license_fields) }} {{ render_included_field(field, disabled_fields=[], include_fields=license_fields) }}
{% endfor %} {% endfor %}
<!-- Register API Key Button --> <!-- Register API Key Button -->
<button type="button" class="btn btn-primary" onclick="generateNewChatApiKey()">Register Chat API Key</button> <div class="form-group">
<button type="button" class="btn btn-primary" onclick="generateNewApiKey()">Register API Key</button> <button type="button" class="btn btn-primary" onclick="generateNewChatApiKey()">Register Chat API Key</button>
<button type="button" class="btn btn-primary" onclick="generateNewApiKey()">Register API Key</button>
</div>
<!-- API Key Display Field --> <!-- API Key Display Field -->
<div id="chat-api-key-field" style="display:none;"> <div id="chat-api-key-field" style="display:none;">
<label for="chat-api-key">Chat API Key:</label> <label for="chat-api-key">Chat API Key:</label>
@@ -78,27 +65,6 @@
<p id="copy-message" style="display:none;color:green;">API key copied to clipboard</p> <p id="copy-message" style="display:none;color:green;">API key copied to clipboard</p>
</div> </div>
</div> </div>
<!-- Chunking Settings Tab -->
<div class="tab-pane fade" id="chunking-tab" role="tabpanel">
{% set html_fields = ['html_tags', 'html_end_tags', 'html_included_elements', 'html_excluded_elements', 'html_excluded_classes', 'min_chunk_size', 'max_chunk_size'] %}
{% for field in form %}
{{ render_included_field(field, disabled_fields=[], include_fields=html_fields) }}
{% endfor %}
</div>
<!-- Embedding Search Settings Tab -->
<div class="tab-pane fade" id="embedding-search-tab" role="tabpanel">
{% set es_fields = ['es_k', 'es_similarity_threshold', ] %}
{% for field in form %}
{{ render_included_field(field, disabled_fields=[], include_fields=es_fields) }}
{% endfor %}
</div>
<!-- Tuning Settings Tab -->
<div class="tab-pane fade" id="tuning-tab" role="tabpanel">
{% set tuning_fields = ['embed_tuning', 'rag_tuning', ] %}
{% for field in form %}
{{ render_included_field(field, disabled_fields=[], include_fields=tuning_fields) }}
{% endfor %}
</div>
</div> </div>
</div> </div>
</div> </div>

View File

@@ -30,21 +30,6 @@
License Information License Information
</a> </a>
</li> </li>
<li class="nav-item">
<a class="nav-link mb-0 px-0 py-1" data-toggle="tab" href="#chunking-tab" role="tab" aria-controls="chunking" aria-selected="false">
Chunking
</a>
</li>
<li class="nav-item">
<a class="nav-link mb-0 px-0 py-1" data-toggle="tab" href="#embedding-search-tab" role="tab" aria-controls="html-chunking" aria-selected="false">
Embedding Search
</a>
</li>
<li class="nav-item">
<a class="nav-link mb-0 px-0 py-1" data-toggle="tab" href="#tuning-tab" role="tab" aria-controls="html-chunking" aria-selected="false">
Tuning
</a>
</li>
</ul> </ul>
</div> </div>
<div class="tab-content tab-space"> <div class="tab-content tab-space">
@@ -78,27 +63,6 @@
<p id="copy-message" style="display:none;color:green;">API key copied to clipboard</p> <p id="copy-message" style="display:none;color:green;">API key copied to clipboard</p>
</div> </div>
</div> </div>
<!-- Chunking Settings Tab -->
<div class="tab-pane fade" id="chunking-tab" role="tabpanel">
{% set html_fields = ['html_tags', 'html_end_tags', 'html_included_elements', 'html_excluded_elements', 'html_excluded_classes', 'min_chunk_size', 'max_chunk_size'] %}
{% for field in form %}
{{ render_included_field(field, disabled_fields=html_fields, include_fields=html_fields) }}
{% endfor %}
</div>
<!-- Embedding Search Settings Tab -->
<div class="tab-pane fade" id="embedding-search-tab" role="tabpanel">
{% set es_fields = ['es_k', 'es_similarity_threshold', ] %}
{% for field in form %}
{{ render_included_field(field, disabled_fields=es_fields, include_fields=es_fields) }}
{% endfor %}
</div>
<!-- Tuning Settings Tab -->
<div class="tab-pane fade" id="tuning-tab" role="tabpanel">
{% set tuning_fields = ['embed_tuning', 'rag_tuning', ] %}
{% for field in form %}
{{ render_included_field(field, disabled_fields=tuning_fields, include_fields=tuning_fields) }}
{% endfor %}
</div>
</div> </div>
</div> </div>
</div> </div>

View File

@@ -0,0 +1,7 @@
from flask import current_app
from flask_wtf import FlaskForm
from wtforms.fields.simple import SubmitField
class TriggerActionForm(FlaskForm):
submit = SubmitField('Submit')

View File

@@ -0,0 +1,39 @@
import uuid
from datetime import datetime as dt, timezone as tz
from flask import request, redirect, flash, render_template, Blueprint, session, current_app, jsonify
from flask_security import hash_password, roles_required, roles_accepted, current_user
from itsdangerous import URLSafeTimedSerializer
from sqlalchemy.exc import SQLAlchemyError
from common.utils.celery_utils import current_celery
from common.utils.view_assistants import prepare_table_for_macro, form_validation_failed
from common.utils.nginx_utils import prefixed_url_for
from .administration_forms import TriggerActionForm
administration_bp = Blueprint('administration_bp', __name__, url_prefix='/administration')
@administration_bp.route('/trigger_actions', methods=['GET'])
@roles_accepted('Super User')
def trigger_actions():
form = TriggerActionForm()
return render_template('administration/trigger_actions.html', form=form)
@administration_bp.route('/handle_trigger_action', methods=['POST'])
@roles_accepted('Super User')
def handle_trigger_action():
action = request.form['action']
match action:
case 'update_usages':
try:
# Use send_task to trigger the task since it's part of another component (eveai_entitlements)
task = current_celery.send_task('update_usages', queue='entitlements')
current_app.logger.info(f"Usage update task triggered: {task.id}")
flash('Usage update task has been triggered successfully!', 'success')
except Exception as e:
current_app.logger.error(f"Failed to trigger usage update task: {str(e)}")
flash(f'Failed to trigger usage update: {str(e)}', 'danger')
return redirect(prefixed_url_for('administration_bp.trigger_actions'))

View File

@@ -1,11 +1,20 @@
from flask import session, current_app from flask import session, current_app, request
from flask_wtf import FlaskForm from flask_wtf import FlaskForm
from wtforms import (StringField, BooleanField, SubmitField, DateField, from wtforms import (StringField, BooleanField, SubmitField, DateField, IntegerField, FloatField, SelectMultipleField,
SelectField, FieldList, FormField, TextAreaField, URLField) SelectField, FieldList, FormField, TextAreaField, URLField)
from wtforms.validators import DataRequired, Length, Optional, URL, ValidationError from wtforms.validators import DataRequired, Length, Optional, URL, ValidationError, NumberRange
from flask_wtf.file import FileField, FileAllowed, FileRequired from flask_wtf.file import FileField, FileAllowed, FileRequired
import json import json
from wtforms_sqlalchemy.fields import QuerySelectField
from common.extensions import db
from common.models.document import Catalog
from config.catalog_types import CATALOG_TYPES
from config.retriever_types import RETRIEVER_TYPES
from .dynamic_form_base import DynamicFormBase
def allowed_file(form, field): def allowed_file(form, field):
if field.data: if field.data:
@@ -23,7 +32,126 @@ def validate_json(form, field):
raise ValidationError('Invalid JSON format') raise ValidationError('Invalid JSON format')
class AddDocumentForm(FlaskForm): class CatalogForm(FlaskForm):
name = StringField('Name', validators=[DataRequired(), Length(max=50)])
description = TextAreaField('Description', validators=[Optional()])
# Select Field for Catalog Type (Uses the CATALOG_TYPES defined in config)
type = SelectField('Catalog Type', validators=[DataRequired()])
# Metadata fields
user_metadata = TextAreaField('User Metadata', validators=[Optional(), validate_json])
system_metadata = TextAreaField('System Metadata', validators=[Optional(), validate_json])
configuration = TextAreaField('Configuration', validators=[Optional(), validate_json])
# HTML Embedding Variables
html_tags = StringField('HTML Tags', validators=[DataRequired()],
default='p, h1, h2, h3, h4, h5, h6, li, , tbody, tr, td')
html_end_tags = StringField('HTML End Tags', validators=[DataRequired()],
default='p, li')
html_included_elements = StringField('HTML Included Elements', validators=[Optional()], default='article, main')
html_excluded_elements = StringField('HTML Excluded Elements', validators=[Optional()],
default='header, footer, nav, script')
html_excluded_classes = StringField('HTML Excluded Classes', validators=[Optional()])
min_chunk_size = IntegerField('Minimum Chunk Size (2000)', validators=[NumberRange(min=0), Optional()],
default=2000)
max_chunk_size = IntegerField('Maximum Chunk Size (3000)', validators=[NumberRange(min=0), Optional()],
default=3000)
# Chat Variables
chat_RAG_temperature = FloatField('RAG Temperature', default=0.3, validators=[NumberRange(min=0, max=1)])
chat_no_RAG_temperature = FloatField('No RAG Temperature', default=0.5, validators=[NumberRange(min=0, max=1)])
# Tuning variables
embed_tuning = BooleanField('Enable Embedding Tuning', default=False)
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# Dynamically populate the 'type' field using the constructor
self.type.choices = [(key, value['name']) for key, value in CATALOG_TYPES.items()]
class EditCatalogForm(DynamicFormBase):
name = StringField('Name', validators=[DataRequired(), Length(max=50)])
description = TextAreaField('Description', validators=[Optional()])
# Select Field for Catalog Type (Uses the CATALOG_TYPES defined in config)
type = StringField('Catalog Type', validators=[DataRequired()], render_kw={'readonly': True})
# Metadata fields
user_metadata = TextAreaField('User Metadata', validators=[Optional(), validate_json])
system_metadata = TextAreaField('System Metadata', validators=[Optional(), validate_json],)
# HTML Embedding Variables
html_tags = StringField('HTML Tags', validators=[DataRequired()],
default='p, h1, h2, h3, h4, h5, h6, li, , tbody, tr, td')
html_end_tags = StringField('HTML End Tags', validators=[DataRequired()],
default='p, li')
html_included_elements = StringField('HTML Included Elements', validators=[Optional()], default='article, main')
html_excluded_elements = StringField('HTML Excluded Elements', validators=[Optional()],
default='header, footer, nav, script')
html_excluded_classes = StringField('HTML Excluded Classes', validators=[Optional()])
min_chunk_size = IntegerField('Minimum Chunk Size (2000)', validators=[NumberRange(min=0), Optional()],
default=2000)
max_chunk_size = IntegerField('Maximum Chunk Size (3000)', validators=[NumberRange(min=0), Optional()],
default=3000)
# Chat Variables
chat_RAG_temperature = FloatField('RAG Temperature', default=0.3, validators=[NumberRange(min=0, max=1)])
chat_no_RAG_temperature = FloatField('No RAG Temperature', default=0.5, validators=[NumberRange(min=0, max=1)])
# Tuning variables
embed_tuning = BooleanField('Enable Embedding Tuning', default=False)
class RetrieverForm(FlaskForm):
name = StringField('Name', validators=[DataRequired(), Length(max=50)])
description = TextAreaField('Description', validators=[Optional()])
# Catalog for the Retriever
catalog = QuerySelectField(
'Catalog ID',
query_factory=lambda: Catalog.query.all(),
allow_blank=True,
get_label='name',
validators=[Optional()],
)
# Select Field for Retriever Type (Uses the RETRIEVER_TYPES defined in config)
type = SelectField('Retriever Type', validators=[DataRequired()])
tuning = BooleanField('Enable Tuning', default=False)
# Metadata fields
user_metadata = TextAreaField('User Metadata', validators=[Optional(), validate_json])
system_metadata = TextAreaField('System Metadata', validators=[Optional(), validate_json])
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# Dynamically populate the 'type' field using the constructor
self.type.choices = [(key, value['name']) for key, value in RETRIEVER_TYPES.items()]
class EditRetrieverForm(DynamicFormBase):
name = StringField('Name', validators=[DataRequired(), Length(max=50)])
description = TextAreaField('Description', validators=[Optional()])
# Catalog for the Retriever
catalog = QuerySelectField(
'Catalog ID',
query_factory=lambda: Catalog.query.all(),
allow_blank=True,
get_label='name',
validators=[Optional()],
)
# Select Field for Retriever Type (Uses the RETRIEVER_TYPES defined in config)
type = SelectField('Retriever Type', validators=[DataRequired()], render_kw={'readonly': True})
tuning = BooleanField('Enable Tuning', default=False)
# Metadata fields
user_metadata = TextAreaField('User Metadata', validators=[Optional(), validate_json])
system_metadata = TextAreaField('System Metadata', validators=[Optional(), validate_json])
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# Set the retriever type choices (loaded from config)
self.type.choices = [(key, value['name']) for key, value in RETRIEVER_TYPES.items()]
class AddDocumentForm(DynamicFormBase):
file = FileField('File', validators=[FileRequired(), allowed_file]) file = FileField('File', validators=[FileRequired(), allowed_file])
name = StringField('Name', validators=[Length(max=100)]) name = StringField('Name', validators=[Length(max=100)])
language = SelectField('Language', choices=[], validators=[Optional()]) language = SelectField('Language', choices=[], validators=[Optional()])
@@ -31,17 +159,15 @@ class AddDocumentForm(FlaskForm):
valid_from = DateField('Valid from', id='form-control datepicker', validators=[Optional()]) valid_from = DateField('Valid from', id='form-control datepicker', validators=[Optional()])
user_metadata = TextAreaField('User Metadata', validators=[Optional(), validate_json]) user_metadata = TextAreaField('User Metadata', validators=[Optional(), validate_json])
submit = SubmitField('Submit') def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def __init__(self):
super().__init__()
self.language.choices = [(language, language) for language in self.language.choices = [(language, language) for language in
session.get('tenant').get('allowed_languages')] session.get('tenant').get('allowed_languages')]
if not self.language.data: if not self.language.data:
self.language.data = session.get('tenant').get('default_language') self.language.data = session.get('tenant').get('default_language')
class AddURLForm(FlaskForm): class AddURLForm(DynamicFormBase):
url = URLField('URL', validators=[DataRequired(), URL()]) url = URLField('URL', validators=[DataRequired(), URL()])
name = StringField('Name', validators=[Length(max=100)]) name = StringField('Name', validators=[Length(max=100)])
language = SelectField('Language', choices=[], validators=[Optional()]) language = SelectField('Language', choices=[], validators=[Optional()])
@@ -49,10 +175,8 @@ class AddURLForm(FlaskForm):
valid_from = DateField('Valid from', id='form-control datepicker', validators=[Optional()]) valid_from = DateField('Valid from', id='form-control datepicker', validators=[Optional()])
user_metadata = TextAreaField('User Metadata', validators=[Optional(), validate_json]) user_metadata = TextAreaField('User Metadata', validators=[Optional(), validate_json])
submit = SubmitField('Submit') def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def __init__(self):
super().__init__()
self.language.choices = [(language, language) for language in self.language.choices = [(language, language) for language in
session.get('tenant').get('allowed_languages')] session.get('tenant').get('allowed_languages')]
if not self.language.data: if not self.language.data:
@@ -84,7 +208,7 @@ class EditDocumentForm(FlaskForm):
submit = SubmitField('Submit') submit = SubmitField('Submit')
class EditDocumentVersionForm(FlaskForm): class EditDocumentVersionForm(DynamicFormBase):
language = StringField('Language') language = StringField('Language')
user_context = TextAreaField('User Context', validators=[Optional()]) user_context = TextAreaField('User Context', validators=[Optional()])
system_context = TextAreaField('System Context', validators=[Optional()]) system_context = TextAreaField('System Context', validators=[Optional()])

View File

@@ -0,0 +1,102 @@
from datetime import datetime
from flask import request, render_template, session
from sqlalchemy import desc, asc, or_, and_, cast, Integer
from common.models.document import Document, Catalog
from common.utils.filtered_list_view import FilteredListView
from common.utils.view_assistants import prepare_table_for_macro
class DocumentListView(FilteredListView):
allowed_filters = ['catalog_id', 'validity']
allowed_sorts = ['id', 'name', 'catalog_name', 'valid_from', 'valid_to']
def get_query(self):
return Document.query.join(Catalog).add_columns(
Document.id,
Document.name,
Catalog.name.label('catalog_name'),
Document.valid_from,
Document.valid_to
)
def apply_filters(self, query):
filters = request.args.to_dict(flat=False)
if 'catalog_id' in filters:
catalog_ids = filters['catalog_id']
if catalog_ids:
# Convert catalog_ids to a list of integers
catalog_ids = [int(cid) for cid in catalog_ids if cid.isdigit()]
if catalog_ids:
query = query.filter(Document.catalog_id.in_(catalog_ids))
if 'validity' in filters:
now = datetime.utcnow().date()
if 'valid' in filters['validity']:
query = query.filter(
and_(
or_(Document.valid_from.is_(None), Document.valid_from <= now),
or_(Document.valid_to.is_(None), Document.valid_to >= now)
)
)
return query
def apply_sorting(self, query):
sort_by = request.args.get('sort_by', 'id')
sort_order = request.args.get('sort_order', 'asc')
if sort_by in self.allowed_sorts:
if sort_by == 'catalog_name':
column = Catalog.name
else:
column = getattr(Document, sort_by)
if sort_order == 'asc':
query = query.order_by(asc(column))
elif sort_order == 'desc':
query = query.order_by(desc(column))
return query
def get(self):
query = self.get_query()
query = self.apply_filters(query)
query = self.apply_sorting(query)
pagination = self.paginate(query)
def format_date(date):
if isinstance(date, datetime):
return date.strftime('%Y-%m-%d')
elif isinstance(date, str):
return date
else:
return ''
rows = [
[
{'value': item.id, 'class': '', 'type': 'text'},
{'value': item.name, 'class': '', 'type': 'text'},
{'value': item.catalog_name, 'class': '', 'type': 'text'},
{'value': format_date(item.valid_from), 'class': '', 'type': 'text'},
{'value': format_date(item.valid_to), 'class': '', 'type': 'text'}
] for item in pagination.items
]
catalogs = Catalog.query.all()
context = {
'rows': rows,
'pagination': pagination,
'filters': request.args.to_dict(flat=False),
'sort_by': request.args.get('sort_by', 'id'),
'sort_order': request.args.get('sort_order', 'asc'),
'filter_options': self.get_filter_options(catalogs)
}
return render_template(self.template, **context)
def get_filter_options(self, catalogs):
return {
'catalog_id': [(str(cat.id), cat.name) for cat in catalogs],
'validity': [('valid', 'Valid'), ('all', 'All')]
}

View File

@@ -12,7 +12,7 @@ class DocumentVersionListView(FilteredListView):
allowed_sorts = ['id', 'processing_started_at', 'processing_finished_at', 'processing_error'] allowed_sorts = ['id', 'processing_started_at', 'processing_finished_at', 'processing_error']
def get_query(self): def get_query(self):
return DocumentVersion.query.join(Document).filter(Document.tenant_id == session.get('tenant', {}).get('id')) return DocumentVersion.query.join(Document)
def apply_filters(self, query): def apply_filters(self, query):
filters = request.args.to_dict() filters = request.args.to_dict()

View File

@@ -1,9 +1,11 @@
import ast import ast
from datetime import datetime as dt, timezone as tz from datetime import datetime as dt, timezone as tz
from babel.messages.setuptools_frontend import update_catalog
from flask import request, redirect, flash, render_template, Blueprint, session, current_app from flask import request, redirect, flash, render_template, Blueprint, session, current_app
from flask_security import roles_accepted, current_user from flask_security import roles_accepted, current_user
from sqlalchemy import desc from sqlalchemy import desc
from sqlalchemy.orm import aliased
from werkzeug.utils import secure_filename from werkzeug.utils import secure_filename
from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.exc import SQLAlchemyError
import requests import requests
@@ -12,19 +14,23 @@ from urllib.parse import urlparse, unquote
import io import io
import json import json
from common.models.document import Document, DocumentVersion from common.models.document import Document, DocumentVersion, Catalog, Retriever
from common.extensions import db, minio_client from common.extensions import db, minio_client
from common.utils.document_utils import validate_file_type, create_document_stack, start_embedding_task, process_url, \ from common.utils.document_utils import validate_file_type, create_document_stack, start_embedding_task, process_url, \
process_multiple_urls, get_documents_list, edit_document, \ process_multiple_urls, get_documents_list, edit_document, \
edit_document_version, refresh_document edit_document_version, refresh_document
from common.utils.eveai_exceptions import EveAIInvalidLanguageException, EveAIUnsupportedFileType, \ from common.utils.eveai_exceptions import EveAIInvalidLanguageException, EveAIUnsupportedFileType, \
EveAIDoubleURLException EveAIDoubleURLException
from .document_forms import AddDocumentForm, AddURLForm, EditDocumentForm, EditDocumentVersionForm, AddURLsForm from .document_forms import AddDocumentForm, AddURLForm, EditDocumentForm, EditDocumentVersionForm, AddURLsForm, \
CatalogForm, EditCatalogForm, RetrieverForm, EditRetrieverForm
from common.utils.middleware import mw_before_request from common.utils.middleware import mw_before_request
from common.utils.celery_utils import current_celery from common.utils.celery_utils import current_celery
from common.utils.nginx_utils import prefixed_url_for from common.utils.nginx_utils import prefixed_url_for
from common.utils.view_assistants import form_validation_failed, prepare_table_for_macro, form_to_dict from common.utils.view_assistants import form_validation_failed, prepare_table_for_macro, form_to_dict
from .document_list_view import DocumentListView
from .document_version_list_view import DocumentVersionListView from .document_version_list_view import DocumentVersionListView
from config.catalog_types import CATALOG_TYPES
from config.retriever_types import RETRIEVER_TYPES
document_bp = Blueprint('document_bp', __name__, url_prefix='/document') document_bp = Blueprint('document_bp', __name__, url_prefix='/document')
@@ -52,13 +58,260 @@ def before_request():
raise raise
@document_bp.route('/catalog', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Tenant Admin')
def catalog():
form = CatalogForm()
if form.validate_on_submit():
tenant_id = session.get('tenant').get('id')
new_catalog = Catalog()
form.populate_obj(new_catalog)
# Handle Embedding Variables
new_catalog.html_tags = [tag.strip() for tag in form.html_tags.data.split(',')] if form.html_tags.data else []
new_catalog.html_end_tags = [tag.strip() for tag in form.html_end_tags.data.split(',')] \
if form.html_end_tags.data else []
new_catalog.html_included_elements = [tag.strip() for tag in form.html_included_elements.data.split(',')] \
if form.html_included_elements.data else []
new_catalog.html_excluded_elements = [tag.strip() for tag in form.html_excluded_elements.data.split(',')] \
if form.html_excluded_elements.data else []
new_catalog.html_excluded_classes = [cls.strip() for cls in form.html_excluded_classes.data.split(',')] \
if form.html_excluded_classes.data else []
set_logging_information(new_catalog, dt.now(tz.utc))
try:
db.session.add(new_catalog)
db.session.commit()
flash('Catalog successfully added!', 'success')
current_app.logger.info(f'Catalog {new_catalog.name} successfully added for tenant {tenant_id}!')
except SQLAlchemyError as e:
db.session.rollback()
flash(f'Failed to add catalog. Error: {e}', 'danger')
current_app.logger.error(f'Failed to add catalog {new_catalog.name}'
f'for tenant {tenant_id}. Error: {str(e)}')
return render_template('document/catalog.html', form=form)
@document_bp.route('/catalogs', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Tenant Admin')
def catalogs():
page = request.args.get('page', 1, type=int)
per_page = request.args.get('per_page', 10, type=int)
query = Catalog.query.order_by(Catalog.id)
pagination = query.paginate(page=page, per_page=per_page)
the_catalogs = pagination.items
# prepare table data
rows = prepare_table_for_macro(the_catalogs, [('id', ''), ('name', ''), ('type', '')])
# Render the catalogs in a template
return render_template('document/catalogs.html', rows=rows, pagination=pagination)
@document_bp.route('/handle_catalog_selection', methods=['POST'])
@roles_accepted('Super User', 'Tenant Admin')
def handle_catalog_selection():
catalog_identification = request.form.get('selected_row')
catalog_id = ast.literal_eval(catalog_identification).get('value')
action = request.form['action']
catalog = Catalog.query.get_or_404(catalog_id)
if action == 'set_session_catalog':
current_app.logger.info(f'Setting session catalog to {catalog.name}')
session['catalog_id'] = catalog_id
session['catalog_name'] = catalog.name
current_app.logger.info(f'Finished setting session catalog to {catalog.name}')
elif action == 'edit_catalog':
return redirect(prefixed_url_for('document_bp.edit_catalog', catalog_id=catalog_id))
return redirect(prefixed_url_for('document_bp.catalogs'))
@document_bp.route('/catalog/<int:catalog_id>', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Tenant Admin')
def edit_catalog(catalog_id):
catalog = Catalog.query.get_or_404(catalog_id)
tenant_id = session.get('tenant').get('id')
form = EditCatalogForm(request.form, obj=catalog)
configuration_config = CATALOG_TYPES[catalog.type]["configuration"]
form.add_dynamic_fields("configuration", configuration_config, catalog.configuration)
# Convert arrays to comma-separated strings for display
if request.method == 'GET':
form.html_tags.data = ', '.join(catalog.html_tags or '')
form.html_end_tags.data = ', '.join(catalog.html_end_tags or '')
form.html_included_elements.data = ', '.join(catalog.html_included_elements or '')
form.html_excluded_elements.data = ', '.join(catalog.html_excluded_elements or '')
form.html_excluded_classes.data = ', '.join(catalog.html_excluded_classes or '')
if request.method == 'POST' and form.validate_on_submit():
form.populate_obj(catalog)
# Handle Embedding Variables
catalog.html_tags = [tag.strip() for tag in form.html_tags.data.split(',')] if form.html_tags.data else []
catalog.html_end_tags = [tag.strip() for tag in form.html_end_tags.data.split(',')] \
if form.html_end_tags.data else []
catalog.html_included_elements = [tag.strip() for tag in form.html_included_elements.data.split(',')] \
if form.html_included_elements.data else []
catalog.html_excluded_elements = [tag.strip() for tag in form.html_excluded_elements.data.split(',')] \
if form.html_excluded_elements.data else []
catalog.html_excluded_classes = [cls.strip() for cls in form.html_excluded_classes.data.split(',')] \
if form.html_excluded_classes.data else []
catalog.configuration = form.get_dynamic_data('configuration')
update_logging_information(catalog, dt.now(tz.utc))
try:
db.session.add(catalog)
db.session.commit()
flash('Catalog successfully updated successfully!', 'success')
current_app.logger.info(f'Catalog {catalog.name} successfully updated for tenant {tenant_id}')
except SQLAlchemyError as e:
db.session.rollback()
flash(f'Failed to update catalog. Error: {e}', 'danger')
current_app.logger.error(f'Failed to update catalog {catalog_id} for tenant {tenant_id}. Error: {str(e)}')
return redirect(prefixed_url_for('document_bp.catalogs'))
else:
form_validation_failed(request, form)
return render_template('document/edit_catalog.html', form=form, catalog_id=catalog_id)
@document_bp.route('/retriever', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Tenant Admin')
def retriever():
form = RetrieverForm()
if form.validate_on_submit():
tenant_id = session.get('tenant').get('id')
new_retriever = Retriever()
form.populate_obj(new_retriever)
new_retriever.catalog_id = form.catalog.data.id
set_logging_information(new_retriever, dt.now(tz.utc))
try:
db.session.add(new_retriever)
db.session.commit()
flash('Retriever successfully added!', 'success')
current_app.logger.info(f'Catalog {new_retriever.name} successfully added for tenant {tenant_id}!')
except SQLAlchemyError as e:
db.session.rollback()
flash(f'Failed to add retriever. Error: {e}', 'danger')
current_app.logger.error(f'Failed to add retriever {new_retriever.name}'
f'for tenant {tenant_id}. Error: {str(e)}')
# Enable step 2 of creation of retriever - add configuration of the retriever (dependent on type)
return redirect(prefixed_url_for('document_bp.retriever', retriever_id=new_retriever.id))
return render_template('document/retriever.html', form=form)
@document_bp.route('/retriever/<int:retriever_id>', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Tenant Admin')
def edit_retriever(retriever_id):
"""Edit an existing retriever configuration."""
# Get the retriever or return 404
retriever = Retriever.query.get_or_404(retriever_id)
if retriever.catalog_id:
# If catalog_id is just an ID, fetch the Catalog object
retriever.catalog = Catalog.query.get(retriever.catalog_id)
else:
retriever.catalog = None
# Create form instance with the retriever
form = EditRetrieverForm(request.form, obj=retriever)
configuration_config = RETRIEVER_TYPES[retriever.type]["configuration"]
form.add_dynamic_fields("configuration", configuration_config, retriever.configuration)
if request.method == 'POST':
current_app.logger.debug(f'Received POST request with {request.form}')
if form.validate_on_submit():
# Update basic fields
form.populate_obj(retriever)
retriever.configuration = form.get_dynamic_data('configuration')
# Update catalog relationship
retriever.catalog_id = form.catalog.data.id if form.catalog.data else None
# Update logging information
update_logging_information(retriever, dt.now(tz.utc))
# Save changes to database
try:
db.session.add(retriever)
db.session.commit()
flash('Retriever updated successfully!', 'success')
current_app.logger.info(f'Retriever {retriever.id} updated successfully')
except SQLAlchemyError as e:
db.session.rollback()
flash(f'Failed to update retriever. Error: {str(e)}', 'danger')
current_app.logger.error(f'Failed to update retriever {retriever_id}. Error: {str(e)}')
return render_template('document/edit_retriever.html', form=form, retriever_id=retriever_id)
return redirect(prefixed_url_for('document_bp.retrievers'))
else:
form_validation_failed(request, form)
current_app.logger.debug(f"Rendering Template for {retriever_id}")
return render_template('document/edit_retriever.html', form=form, retriever_id=retriever_id)
@document_bp.route('/retrievers', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Tenant Admin')
def retrievers():
page = request.args.get('page', 1, type=int)
per_page = request.args.get('per_page', 10, type=int)
query = Retriever.query.order_by(Retriever.id)
pagination = query.paginate(page=page, per_page=per_page)
the_retrievers = pagination.items
# prepare table data
rows = prepare_table_for_macro(the_retrievers,
[('id', ''), ('name', ''), ('type', ''), ('catalog_id', '')])
# Render the catalogs in a template
return render_template('document/retrievers.html', rows=rows, pagination=pagination)
@document_bp.route('/handle_retriever_selection', methods=['POST'])
@roles_accepted('Super User', 'Tenant Admin')
def handle_retriever_selection():
retriever_identification = request.form.get('selected_row')
retriever_id = ast.literal_eval(retriever_identification).get('value')
action = request.form['action']
if action == 'edit_retriever':
return redirect(prefixed_url_for('document_bp.edit_retriever', retriever_id=retriever_id))
return redirect(prefixed_url_for('document_bp.retrievers'))
@document_bp.route('/add_document', methods=['GET', 'POST']) @document_bp.route('/add_document', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Tenant Admin') @roles_accepted('Super User', 'Tenant Admin')
def add_document(): def add_document():
form = AddDocumentForm() form = AddDocumentForm(request.form)
catalog_id = session.get('catalog_id', None)
if catalog_id is None:
flash('You need to set a Session Catalog before adding Documents or URLs')
return redirect(prefixed_url_for('document_bp.catalogs'))
catalog = Catalog.query.get_or_404(catalog_id)
if catalog.configuration and len(catalog.configuration) > 0:
document_version_configurations = CATALOG_TYPES[catalog.type]['document_version_configurations']
for config in document_version_configurations:
form.add_dynamic_fields(config, catalog.configuration[config])
if form.validate_on_submit(): if form.validate_on_submit():
try: try:
current_app.logger.info(f'Adding Document for {catalog_id}')
tenant_id = session['tenant']['id'] tenant_id = session['tenant']['id']
file = form.file.data file = form.file.data
filename = secure_filename(file.filename) filename = secure_filename(file.filename)
@@ -66,13 +319,19 @@ def add_document():
validate_file_type(extension) validate_file_type(extension)
current_app.logger.debug(f'Language on form: {form.language.data}') catalog_properties = {}
document_version_configurations = CATALOG_TYPES[catalog.type]['document_version_configurations']
for config in document_version_configurations:
catalog_properties[config] = form.get_dynamic_data(config)
api_input = { api_input = {
'catalog_id': catalog_id,
'name': form.name.data, 'name': form.name.data,
'language': form.language.data, 'language': form.language.data,
'user_context': form.user_context.data, 'user_context': form.user_context.data,
'valid_from': form.valid_from.data, 'valid_from': form.valid_from.data,
'user_metadata': json.loads(form.user_metadata.data) if form.user_metadata.data else None, 'user_metadata': json.loads(form.user_metadata.data) if form.user_metadata.data else None,
'catalog_properties': catalog_properties,
} }
current_app.logger.debug(f'Creating document stack with input {api_input}') current_app.logger.debug(f'Creating document stack with input {api_input}')
@@ -95,7 +354,17 @@ def add_document():
@document_bp.route('/add_url', methods=['GET', 'POST']) @document_bp.route('/add_url', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Tenant Admin') @roles_accepted('Super User', 'Tenant Admin')
def add_url(): def add_url():
form = AddURLForm() form = AddURLForm(request.form)
catalog_id = session.get('catalog_id', None)
if catalog_id is None:
flash('You need to set a Session Catalog before adding Documents or URLs')
return redirect(prefixed_url_for('document_bp.catalogs'))
catalog = Catalog.query.get_or_404(catalog_id)
if catalog.configuration and len(catalog.configuration) > 0:
document_version_configurations = CATALOG_TYPES[catalog.type]['document_version_configurations']
for config in document_version_configurations:
form.add_dynamic_fields(config, catalog.configuration[config])
if form.validate_on_submit(): if form.validate_on_submit():
try: try:
@@ -104,13 +373,20 @@ def add_url():
file_content, filename, extension = process_url(url, tenant_id) file_content, filename, extension = process_url(url, tenant_id)
catalog_properties = {}
document_version_configurations = CATALOG_TYPES[catalog.type]['document_version_configurations']
for config in document_version_configurations:
catalog_properties[config] = form.get_dynamic_data(config)
api_input = { api_input = {
'catalog_id': catalog_id,
'name': form.name.data or filename, 'name': form.name.data or filename,
'url': url, 'url': url,
'language': form.language.data, 'language': form.language.data,
'user_context': form.user_context.data, 'user_context': form.user_context.data,
'valid_from': form.valid_from.data, 'valid_from': form.valid_from.data,
'user_metadata': json.loads(form.user_metadata.data) if form.user_metadata.data else None, 'user_metadata': json.loads(form.user_metadata.data) if form.user_metadata.data else None,
'catalog_properties': catalog_properties,
} }
new_doc, new_doc_vers = create_document_stack(api_input, file_content, filename, extension, tenant_id) new_doc, new_doc_vers = create_document_stack(api_input, file_content, filename, extension, tenant_id)
@@ -131,62 +407,26 @@ def add_url():
return render_template('document/add_url.html', form=form) return render_template('document/add_url.html', form=form)
@document_bp.route('/add_urls', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Tenant Admin')
def add_urls():
form = AddURLsForm()
if form.validate_on_submit():
try:
tenant_id = session['tenant']['id']
urls = form.urls.data.split('\n')
urls = [url.strip() for url in urls if url.strip()]
api_input = {
'name': form.name.data,
'language': form.language.data,
'user_context': form.user_context.data,
'valid_from': form.valid_from.data
}
results = process_multiple_urls(urls, tenant_id, api_input)
for result in results:
if result['status'] == 'success':
flash(
f"Processed URL: {result['url']} - Document ID: {result['document_id']}, Version ID: {result['document_version_id']}",
'success')
else:
flash(f"Error processing URL: {result['url']} - {result['message']}", 'error')
return redirect(prefixed_url_for('document_bp.documents'))
except Exception as e:
current_app.logger.error(f'Error adding multiple URLs: {str(e)}')
flash('An error occurred while adding the URLs.', 'error')
return render_template('document/add_urls.html', form=form)
@document_bp.route('/documents', methods=['GET', 'POST']) @document_bp.route('/documents', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Tenant Admin') @roles_accepted('Super User', 'Tenant Admin')
def documents(): def documents():
page = request.args.get('page', 1, type=int) view = DocumentListView(Document, 'document/documents.html', per_page=10)
per_page = request.args.get('per_page', 10, type=int) return view.get()
pagination = get_documents_list(page, per_page)
docs = pagination.items
rows = prepare_table_for_macro(docs, [('id', ''), ('name', ''), ('valid_from', ''), ('valid_to', '')])
return render_template('document/documents.html', rows=rows, pagination=pagination)
@document_bp.route('/handle_document_selection', methods=['POST']) @document_bp.route('/handle_document_selection', methods=['POST'])
@roles_accepted('Super User', 'Tenant Admin') @roles_accepted('Super User', 'Tenant Admin')
def handle_document_selection(): def handle_document_selection():
document_identification = request.form['selected_row'] document_identification = request.form['selected_row']
doc_id = ast.literal_eval(document_identification).get('value') if isinstance(document_identification, int) or document_identification.isdigit():
doc_id = int(document_identification)
else:
# If it's not an integer, assume it's a string representation of a dictionary
try:
doc_id = ast.literal_eval(document_identification).get('value')
except (ValueError, AttributeError):
flash('Invalid document selection.', 'error')
return redirect(prefixed_url_for('document_bp.documents'))
action = request.form['action'] action = request.form['action']
@@ -208,9 +448,25 @@ def handle_document_selection():
@document_bp.route('/edit_document/<int:document_id>', methods=['GET', 'POST']) @document_bp.route('/edit_document/<int:document_id>', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Tenant Admin') @roles_accepted('Super User', 'Tenant Admin')
def edit_document_view(document_id): def edit_document_view(document_id):
doc = Document.query.get_or_404(document_id) # Use an alias for the Catalog to avoid column name conflicts
CatalogAlias = aliased(Catalog)
# Query for the document and its catalog
result = db.session.query(Document, CatalogAlias.name.label('catalog_name')) \
.join(CatalogAlias, Document.catalog_id == CatalogAlias.id) \
.filter(Document.id == document_id) \
.first_or_404()
doc, catalog_name = result
form = EditDocumentForm(obj=doc) form = EditDocumentForm(obj=doc)
if request.method == 'GET':
# Populate form with current values
form.name.data = doc.name
form.valid_from.data = doc.valid_from
form.valid_to.data = doc.valid_to
if form.validate_on_submit(): if form.validate_on_submit():
updated_doc, error = edit_document( updated_doc, error = edit_document(
document_id, document_id,
@@ -226,19 +482,36 @@ def edit_document_view(document_id):
else: else:
form_validation_failed(request, form) form_validation_failed(request, form)
return render_template('document/edit_document.html', form=form, document_id=document_id) return render_template('document/edit_document.html', form=form, document_id=document_id, catalog_name=catalog_name)
@document_bp.route('/edit_document_version/<int:document_version_id>', methods=['GET', 'POST']) @document_bp.route('/edit_document_version/<int:document_version_id>', methods=['GET', 'POST'])
@roles_accepted('Super User', 'Tenant Admin') @roles_accepted('Super User', 'Tenant Admin')
def edit_document_version_view(document_version_id): def edit_document_version_view(document_version_id):
doc_vers = DocumentVersion.query.get_or_404(document_version_id) doc_vers = DocumentVersion.query.get_or_404(document_version_id)
form = EditDocumentVersionForm(obj=doc_vers) form = EditDocumentVersionForm(request.form, obj=doc_vers)
catalog_id = session.get('catalog_id', None)
if catalog_id is None:
flash('You need to set a Session Catalog before adding Documents or URLs')
return redirect(prefixed_url_for('document_bp.catalogs'))
catalog = Catalog.query.get_or_404(catalog_id)
if catalog.configuration and len(catalog.configuration) > 0:
document_version_configurations = CATALOG_TYPES[catalog.type]['document_version_configurations']
for config in document_version_configurations:
form.add_dynamic_fields(config, catalog.configuration[config], doc_vers.catalog_properties[config])
if form.validate_on_submit(): if form.validate_on_submit():
catalog_properties = {}
document_version_configurations = CATALOG_TYPES[catalog.type]['document_version_configurations']
for config in document_version_configurations:
catalog_properties[config] = form.get_dynamic_data(config)
updated_version, error = edit_document_version( updated_version, error = edit_document_version(
document_version_id, document_version_id,
form.user_context.data form.user_context.data,
catalog_properties,
) )
if updated_version: if updated_version:
flash(f'Document Version {updated_version.id} updated successfully', 'success') flash(f'Document Version {updated_version.id} updated successfully', 'success')
@@ -268,8 +541,8 @@ def document_versions(document_id):
pagination = query.paginate(page=page, per_page=per_page, error_out=False) pagination = query.paginate(page=page, per_page=per_page, error_out=False)
doc_langs = pagination.items doc_langs = pagination.items
rows = prepare_table_for_macro(doc_langs, [('id', ''), ('url', ''), ('file_location', ''), rows = prepare_table_for_macro(doc_langs, [('id', ''), ('url', ''),
('file_name', ''), ('file_type', ''), ('object_name', ''), ('file_type', ''),
('processing', ''), ('processing_started_at', ''), ('processing', ''), ('processing_started_at', ''),
('processing_finished_at', ''), ('processing_error', '')]) ('processing_finished_at', ''), ('processing_error', '')])
@@ -280,7 +553,15 @@ def document_versions(document_id):
@roles_accepted('Super User', 'Tenant Admin') @roles_accepted('Super User', 'Tenant Admin')
def handle_document_version_selection(): def handle_document_version_selection():
document_version_identification = request.form['selected_row'] document_version_identification = request.form['selected_row']
doc_vers_id = ast.literal_eval(document_version_identification).get('value') if isinstance(document_version_identification, int) or document_version_identification.isdigit():
doc_vers_id = int(document_version_identification)
else:
# If it's not an integer, assume it's a string representation of a dictionary
try:
doc_vers_id = ast.literal_eval(document_version_identification).get('value')
except (ValueError, AttributeError):
flash('Invalid document version selection.', 'error')
return redirect(prefixed_url_for('document_bp.document_versions_list'))
action = request.form['action'] action = request.form['action']
@@ -332,7 +613,7 @@ def refresh_all_documents():
def refresh_document_view(document_id): def refresh_document_view(document_id):
new_version, result = refresh_document(document_id) new_version, result = refresh_document(document_id, session['tenant']['id'])
if new_version: if new_version:
flash(f'Document refreshed. New version: {new_version.id}. Task ID: {result}', 'success') flash(f'Document refreshed. New version: {new_version.id}. Task ID: {result}', 'success')
else: else:
@@ -349,10 +630,9 @@ def re_embed_latest_versions():
def process_version(version_id): def process_version(version_id):
task = current_celery.send_task('create_embeddings', queue='embeddings', args=[ task = current_celery.send_task('create_embeddings',
session['tenant']['id'], args=[session['tenant']['id'], version_id,],
version_id, queue='embeddings')
])
current_app.logger.info(f'Embedding creation retriggered by user {current_user.id}, {current_user.email} ' current_app.logger.info(f'Embedding creation retriggered by user {current_user.id}, {current_user.email} '
f'for tenant {session["tenant"]["id"]}, ' f'for tenant {session["tenant"]["id"]}, '
f'Document Version {version_id}. ' f'Document Version {version_id}. '
@@ -396,49 +676,3 @@ def fetch_html(url):
response.raise_for_status() # Will raise an exception for bad requests response.raise_for_status() # Will raise an exception for bad requests
return response.content return response.content
def prepare_document_data(docs):
rows = []
for doc in docs:
doc_row = [{'value': doc.name, 'class': '', 'type': 'text'},
{'value': doc.created_at.strftime("%Y-%m-%d %H:%M:%S"), 'class': '', 'type': 'text'}]
# Document basic details
if doc.valid_from:
doc_row.append({'value': doc.valid_from.strftime("%Y-%m-%d"), 'class': '', 'type': 'text'})
else:
doc_row.append({'value': '', 'class': '', 'type': 'text'})
# Nested languages and versions
languages_rows = []
for lang in doc.languages:
lang_row = [{'value': lang.language, 'class': '', 'type': 'text'}]
# Latest version details if available (should be available ;-) )
if lang.latest_version:
lang_row.append({'value': lang.latest_version.created_at.strftime("%Y-%m-%d %H:%M:%S"),
'class': '', 'type': 'text'})
if lang.latest_version.url:
lang_row.append({'value': lang.latest_version.url,
'class': '', 'type': 'link', 'href': lang.latest_version.url})
else:
lang_row.append({'value': '', 'class': '', 'type': 'text'})
if lang.latest_version.file_name:
lang_row.append({'value': lang.latest_version.file_name, 'class': '', 'type': 'text'})
else:
lang_row.append({'value': '', 'class': '', 'type': 'text'})
if lang.latest_version.file_type:
lang_row.append({'value': lang.latest_version.file_type, 'class': '', 'type': 'text'})
else:
lang_row.append({'value': '', 'class': '', 'type': 'text'})
# Include other details as necessary
languages_rows.append(lang_row)
doc_row.append({'is_group': True, 'colspan': '5',
'headers': ['Language', 'Latest Version', 'URL', 'File Name', 'Type'],
'sub_rows': languages_rows})
rows.append(doc_row)
return rows

View File

@@ -0,0 +1,216 @@
from flask_wtf import FlaskForm
from wtforms import IntegerField, FloatField, BooleanField, StringField, TextAreaField, validators, ValidationError
from flask import current_app
import json
from wtforms.fields.choices import SelectField
from wtforms.fields.datetime import DateField
class DynamicFormBase(FlaskForm):
def __init__(self, formdata=None, *args, **kwargs):
super(DynamicFormBase, self).__init__(*args, **kwargs)
# Maps collection names to lists of field names
self.dynamic_fields = {}
# Store formdata for later use
self.formdata = formdata
def _create_field_validators(self, field_def):
"""Create validators based on field definition"""
validators_list = []
# Required validator
if field_def.get('required', False):
validators_list.append(validators.InputRequired())
else:
validators_list.append(validators.Optional())
# Type-specific validators
field_type = field_def.get('type')
if field_type in ['integer', 'float']:
min_value = field_def.get('min_value')
max_value = field_def.get('max_value')
if min_value is not None or max_value is not None:
validators_list.append(
validators.NumberRange(
min=min_value if min_value is not None else -float('inf'),
max=max_value if max_value is not None else float('inf'),
message=f"Value must be between {min_value or '-∞'} and {max_value or ''}"
)
)
return validators_list
def add_dynamic_fields(self, collection_name, config, initial_data=None):
"""Add dynamic fields to the form based on the configuration."""
self.dynamic_fields[collection_name] = []
for field_name, field_def in config.items():
current_app.logger.debug(f"{field_name}: {field_def}")
# Prefix the field name with the collection name
full_field_name = f"{collection_name}_{field_name}"
label = field_def.get('name', field_name)
field_type = field_def.get('type')
description = field_def.get('description', '')
default = field_def.get('default')
# Determine standard validators
field_validators = self._create_field_validators(field_def)
# Handle special case for tagging_fields
if field_type == 'tagging_fields':
field_class = TextAreaField
field_validators.append(validate_tagging_fields)
extra_classes = 'json-editor'
field_kwargs = {}
elif field_type == 'enum':
field_class = SelectField
allowed_values = field_def.get('allowed_values', [])
choices = [(str(val), str(val)) for val in allowed_values]
extra_classes = ''
field_kwargs = {'choices': choices}
else:
extra_classes = ''
field_class = {
'integer': IntegerField,
'float': FloatField,
'boolean': BooleanField,
'string': StringField,
'date': DateField,
}.get(field_type, StringField)
field_kwargs = {}
# Prepare field data
field_data = None
if initial_data and field_name in initial_data:
field_data = initial_data[field_name]
if field_type == 'tagging_fields' and isinstance(field_data, dict):
try:
field_data = json.dumps(field_data, indent=2)
except (TypeError, ValueError) as e:
current_app.logger.error(f"Error converting initial data to JSON: {e}")
field_data = "{}"
elif default is not None:
field_data = default
# Create render_kw with classes and any other HTML attributes
render_kw = {'class': extra_classes} if extra_classes else {}
if description:
render_kw['title'] = description # For tooltip
render_kw['data-bs-toggle'] = 'tooltip'
render_kw['data-bs-placement'] = 'right'
# Create the field
field_kwargs.update({
'label': label,
'description': description,
'validators': field_validators,
'default': field_data,
'render_kw': render_kw
})
unbound_field = field_class(**field_kwargs)
# Bind the field to the form
bound_field = unbound_field.bind(form=self, name=full_field_name)
# Process the field with formdata
if self.formdata and full_field_name in self.formdata:
bound_field.process(self.formdata)
else:
bound_field.process(formdata=None, data=field_data) # Use prepared field_data
# Add the field to the form
setattr(self, full_field_name, bound_field)
self._fields[full_field_name] = bound_field
self.dynamic_fields[collection_name].append(full_field_name)
def get_static_fields(self):
"""Return a list of static field instances."""
# Get names of dynamic fields
dynamic_field_names = set()
for field_list in self.dynamic_fields.values():
dynamic_field_names.update(field_list)
# Return all fields that are not dynamic
return [field for name, field in self._fields.items() if name not in dynamic_field_names]
def get_dynamic_fields(self):
"""Return a dictionary of dynamic fields per collection."""
result = {}
for collection_name, field_names in self.dynamic_fields.items():
result[collection_name] = [getattr(self, name) for name in field_names]
return result
def get_dynamic_data(self, collection_name):
"""Retrieve the data from dynamic fields of a specific collection."""
data = {}
current_app.logger.debug(f"{collection_name} in {self.dynamic_fields}?")
if collection_name not in self.dynamic_fields:
return data
prefix_length = len(collection_name) + 1 # +1 for the underscore
for full_field_name in self.dynamic_fields[collection_name]:
current_app.logger.debug(f"{full_field_name}: {full_field_name}")
original_field_name = full_field_name[prefix_length:]
current_app.logger.debug(f"{original_field_name}: {original_field_name}")
field = getattr(self, full_field_name)
current_app.logger.debug(f"{field}: {field}")
# Parse JSON for tagging_fields type
if isinstance(field, TextAreaField) and field.data:
try:
data[original_field_name] = json.loads(field.data)
except json.JSONDecodeError:
# Validation should catch this, but just in case
data[original_field_name] = field.data
else:
data[original_field_name] = field.data
return data
def validate_tagging_fields(form, field):
"""Validate the tagging fields structure"""
if not field.data:
return
try:
# Parse JSON data
fields_data = json.loads(field.data)
# Validate it's a dictionary
if not isinstance(fields_data, dict):
raise ValidationError("Tagging fields must be a dictionary")
# Validate each field definition
for field_name, field_def in fields_data.items():
if not isinstance(field_def, dict):
raise ValidationError(f"Field definition for {field_name} must be a dictionary")
# Check required properties
if 'type' not in field_def:
raise ValidationError(f"Field {field_name} missing required 'type' property")
# Validate type
if field_def['type'] not in ['string', 'integer', 'float', 'date', 'enum']:
raise ValidationError(f"Field {field_name} has invalid type: {field_def['type']}")
# Validate enum fields have allowed_values
if field_def['type'] == 'enum':
if 'allowed_values' not in field_def:
raise ValidationError(f"Enum field {field_name} missing required 'allowed_values' list")
if not isinstance(field_def['allowed_values'], list):
raise ValidationError(f"Field {field_name} allowed_values must be a list")
# Validate numeric fields
if field_def['type'] in ['integer', 'float']:
if 'min_value' in field_def and 'max_value' in field_def:
min_val = float(field_def['min_value'])
max_val = float(field_def['max_value'])
if min_val >= max_val:
raise ValidationError(f"Field {field_name} min_value must be less than max_value")
except json.JSONDecodeError:
raise ValidationError("Invalid JSON format")
except (TypeError, ValueError) as e:
raise ValidationError(f"Invalid field definition: {str(e)}")

View File

@@ -2,18 +2,14 @@ import uuid
from datetime import datetime as dt, timezone as tz from datetime import datetime as dt, timezone as tz
from flask import request, redirect, flash, render_template, Blueprint, session, current_app, jsonify from flask import request, redirect, flash, render_template, Blueprint, session, current_app, jsonify
from flask_security import hash_password, roles_required, roles_accepted, current_user from flask_security import hash_password, roles_required, roles_accepted, current_user
from itsdangerous import URLSafeTimedSerializer
from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy import or_ from sqlalchemy import or_, desc
import ast import ast
from common.models.entitlements import License, LicenseTier, LicenseUsage, BusinessEventLog from common.models.entitlements import License, LicenseTier, LicenseUsage, BusinessEventLog
from common.extensions import db, security, minio_client, simple_encryption from common.extensions import db, security, minio_client, simple_encryption
from common.utils.security_utils import send_confirmation_email, send_reset_email
from .entitlements_forms import LicenseTierForm, LicenseForm from .entitlements_forms import LicenseTierForm, LicenseForm
from common.utils.database import Database
from common.utils.view_assistants import prepare_table_for_macro, form_validation_failed from common.utils.view_assistants import prepare_table_for_macro, form_validation_failed
from common.utils.simple_encryption import generate_api_key
from common.utils.nginx_utils import prefixed_url_for from common.utils.nginx_utils import prefixed_url_for
entitlements_bp = Blueprint('entitlements_bp', __name__, url_prefix='/entitlements') entitlements_bp = Blueprint('entitlements_bp', __name__, url_prefix='/entitlements')
@@ -174,14 +170,14 @@ def create_license(license_tier_id):
db.session.add(new_license) db.session.add(new_license)
db.session.commit() db.session.commit()
flash('License created successfully', 'success') flash('License created successfully', 'success')
return redirect(prefixed_url_for('entitlements_bp/edit_license', license_id=new_license.id)) return redirect(prefixed_url_for('entitlements_bp.edit_license', license_id=new_license.id))
except Exception as e: except Exception as e:
db.session.rollback() db.session.rollback()
flash(f'Error creating license: {str(e)}', 'error') flash(f'Error creating license: {str(e)}', 'error')
else: else:
form_validation_failed(request, form) form_validation_failed(request, form)
return render_template('entitlements/license.html', form=form) return render_template('entitlements/license.html', form=form, ext_disabled_fields=[])
@entitlements_bp.route('/license/<int:license_id>', methods=['GET', 'POST']) @entitlements_bp.route('/license/<int:license_id>', methods=['GET', 'POST'])
@@ -215,3 +211,25 @@ def edit_license(license_id):
return render_template('entitlements/license.html', form=form, license_tier_id=license_tier.id, return render_template('entitlements/license.html', form=form, license_tier_id=license_tier.id,
ext_disabled_fields=disabled_fields) ext_disabled_fields=disabled_fields)
@entitlements_bp.route('/view_usages')
@roles_accepted('Super User', 'Tenant Admin')
def view_usages():
page = request.args.get('page', 1, type=int)
per_page = request.args.get('per_page', 10, type=int)
tenant_id = session.get('tenant').get('id')
query = LicenseUsage.query.filter_by(tenant_id=tenant_id).order_by(desc(LicenseUsage.id))
pagination = query.paginate(page=page, per_page=per_page)
lus = pagination.items
# prepare table data
rows = prepare_table_for_macro(lus, [('id', ''), ('period_start_date', ''), ('period_end_date', ''),
('storage_mb_used', ''), ('embedding_mb_used', ''),
('interaction_total_tokens_used', '')])
# Render the users in a template
return render_template('entitlements/view_usages.html', rows=rows, pagination=pagination)

View File

@@ -93,17 +93,17 @@ def view_chat_session(chat_session_id):
# Fetch all related embeddings for the interactions in this session # Fetch all related embeddings for the interactions in this session
embedding_query = (db.session.query(InteractionEmbedding.interaction_id, embedding_query = (db.session.query(InteractionEmbedding.interaction_id,
DocumentVersion.url, DocumentVersion.url,
DocumentVersion.file_name) DocumentVersion.object_name)
.join(Embedding, InteractionEmbedding.embedding_id == Embedding.id) .join(Embedding, InteractionEmbedding.embedding_id == Embedding.id)
.join(DocumentVersion, Embedding.doc_vers_id == DocumentVersion.id) .join(DocumentVersion, Embedding.doc_vers_id == DocumentVersion.id)
.filter(InteractionEmbedding.interaction_id.in_([i.id for i in interactions]))) .filter(InteractionEmbedding.interaction_id.in_([i.id for i in interactions])))
# Create a dictionary to store embeddings for each interaction # Create a dictionary to store embeddings for each interaction
embeddings_dict = {} embeddings_dict = {}
for interaction_id, url, file_name in embedding_query: for interaction_id, url, object_name in embedding_query:
if interaction_id not in embeddings_dict: if interaction_id not in embeddings_dict:
embeddings_dict[interaction_id] = [] embeddings_dict[interaction_id] = []
embeddings_dict[interaction_id].append({'url': url, 'file_name': file_name}) embeddings_dict[interaction_id].append({'url': url, 'object_name': object_name})
return render_template('interaction/view_chat_session.html', return render_template('interaction/view_chat_session.html',
chat_session=chat_session, chat_session=chat_session,

View File

@@ -48,22 +48,6 @@ def tenant():
new_tenant = Tenant() new_tenant = Tenant()
form.populate_obj(new_tenant) form.populate_obj(new_tenant)
# Handle Embedding Variables
new_tenant.html_tags = [tag.strip() for tag in form.html_tags.data.split(',')] if form.html_tags.data else []
new_tenant.html_end_tags = [tag.strip() for tag in form.html_end_tags.data.split(',')] \
if form.html_end_tags.data else []
new_tenant.html_included_elements = [tag.strip() for tag in form.html_included_elements.data.split(',')] \
if form.html_included_elements.data else []
new_tenant.html_excluded_elements = [tag.strip() for tag in form.html_excluded_elements.data.split(',')] \
if form.html_excluded_elements.data else []
new_tenant.html_excluded_classes = [cls.strip() for cls in form.html_excluded_classes.data.split(',')] \
if form.html_excluded_classes.data else []
current_app.logger.debug(f'html_tags: {new_tenant.html_tags},'
f'html_end_tags: {new_tenant.html_end_tags},'
f'html_included_elements: {new_tenant.html_included_elements},'
f'html_excluded_elements: {new_tenant.html_excluded_elements}')
# Handle Timestamps # Handle Timestamps
timestamp = dt.now(tz.utc) timestamp = dt.now(tz.utc)
new_tenant.created_at = timestamp new_tenant.created_at = timestamp
@@ -105,30 +89,11 @@ def edit_tenant(tenant_id):
if request.method == 'GET': if request.method == 'GET':
# Populate the form with tenant data # Populate the form with tenant data
form.populate_obj(tenant) form.populate_obj(tenant)
if tenant.html_tags:
form.html_tags.data = ', '.join(tenant.html_tags)
if tenant.html_end_tags:
form.html_end_tags.data = ', '.join(tenant.html_end_tags)
if tenant.html_included_elements:
form.html_included_elements.data = ', '.join(tenant.html_included_elements)
if tenant.html_excluded_elements:
form.html_excluded_elements.data = ', '.join(tenant.html_excluded_elements)
if tenant.html_excluded_classes:
form.html_excluded_classes.data = ', '.join(tenant.html_excluded_classes)
if form.validate_on_submit(): if form.validate_on_submit():
current_app.logger.debug(f'Updating tenant {tenant_id}') current_app.logger.debug(f'Updating tenant {tenant_id}')
# Populate the tenant with form data # Populate the tenant with form data
form.populate_obj(tenant) form.populate_obj(tenant)
# Then handle the special fields manually
tenant.html_tags = [tag.strip() for tag in form.html_tags.data.split(',') if tag.strip()]
tenant.html_end_tags = [tag.strip() for tag in form.html_end_tags.data.split(',') if tag.strip()]
tenant.html_included_elements = [elem.strip() for elem in form.html_included_elements.data.split(',') if
elem.strip()]
tenant.html_excluded_elements = [elem.strip() for elem in form.html_excluded_elements.data.split(',') if
elem.strip()]
tenant.html_excluded_classes = [elem.strip() for elem in form.html_excluded_classes.data.split(',') if
elem.strip()]
db.session.commit() db.session.commit()
flash('Tenant updated successfully.', 'success') flash('Tenant updated successfully.', 'success')
@@ -266,10 +231,16 @@ def handle_tenant_selection():
tenant_identification = request.form['selected_row'] tenant_identification = request.form['selected_row']
tenant_id = ast.literal_eval(tenant_identification).get('value') tenant_id = ast.literal_eval(tenant_identification).get('value')
the_tenant = Tenant.query.get(tenant_id) the_tenant = Tenant.query.get(tenant_id)
# set tenant information in the session
session['tenant'] = the_tenant.to_dict() session['tenant'] = the_tenant.to_dict()
session['default_language'] = the_tenant.default_language session['default_language'] = the_tenant.default_language
session['embedding_model'] = the_tenant.embedding_model session['embedding_model'] = the_tenant.embedding_model
session['llm_model'] = the_tenant.llm_model session['llm_model'] = the_tenant.llm_model
# remove catalog-related items from the session
session.pop('catalog_id', None)
session.pop('catalog_name', None)
action = request.form['action'] action = request.form['action']
match action: match action:

44
eveai_beat/__init__.py Normal file
View File

@@ -0,0 +1,44 @@
import logging
import logging.config
from flask import Flask
import os
from common.utils.celery_utils import make_celery, init_celery
from config.logging_config import LOGGING
from config.config import get_config
def create_app(config_file=None):
app = Flask(__name__)
environment = os.getenv('FLASK_ENV', 'development')
match environment:
case 'development':
app.config.from_object(get_config('dev'))
case 'production':
app.config.from_object(get_config('prod'))
case _:
app.config.from_object(get_config('dev'))
logging.config.dictConfig(LOGGING)
register_extensions(app)
celery = make_celery(app.name, app.config)
init_celery(celery, app, is_beat=True)
from . import schedule
celery.conf.beat_schedule = schedule.beat_schedule
app.logger.info("EveAI Beat Scheduler Started Successfully")
app.logger.info("-------------------------------------------------------------------------------------------------")
return app, celery
def register_extensions(app):
pass
app, celery = create_app()

17
eveai_beat/schedule.py Normal file
View File

@@ -0,0 +1,17 @@
from celery.schedules import crontab
# Define the Celery beat schedule here
beat_schedule = {
'update-tenant-usages-every-hour': {
'task': 'update_usages',
'schedule': crontab(minute='0'), # Runs every hour
'args': (),
'options': {'queue': 'entitlements'}
},
# 'send-invoices-every-month': {
# 'task': 'send_invoices',
# 'schedule': crontab(day_of_month=1, hour=0, minute=0), # Runs on the 1st of every month
# 'args': ()
# },
# Add more schedules as needed
}

View File

@@ -109,14 +109,16 @@ def handle_message(data):
room = session.get('room') room = session.get('room')
# Offload actual processing of question # Offload actual processing of question
task = current_celery.send_task('ask_question', queue='llm_interactions', args=[ task = current_celery.send_task('ask_question',
current_tenant_id, queue='llm_interactions',
data['message'], args=[
data['language'], current_tenant_id,
session['session_id'], data['message'],
data['timezone'], data['language'],
room session['session_id'],
]) data['timezone'],
room
])
current_app.logger.debug(f'SocketIO: Message offloading for tenant {current_tenant_id}, ' current_app.logger.debug(f'SocketIO: Message offloading for tenant {current_tenant_id}, '
f'Question: {task.id}') f'Question: {task.id}')
response = { response = {

View File

@@ -1,29 +1,22 @@
from datetime import datetime as dt, timezone as tz from datetime import datetime as dt, timezone as tz
from flask import current_app, session from flask import current_app
from langchain_core.output_parsers import StrOutputParser from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableParallel, RunnablePassthrough from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain.globals import set_debug
from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.exc import SQLAlchemyError
from celery import states
from celery.exceptions import Ignore
import os
# OpenAI imports # OpenAI imports
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.summarize import load_summarize_chain
from langchain.text_splitter import CharacterTextSplitter
from langchain_core.exceptions import LangChainException from langchain_core.exceptions import LangChainException
from common.utils.database import Database from common.utils.database import Database
from common.models.document import DocumentVersion, EmbeddingMistral, EmbeddingSmallOpenAI, Embedding from common.models.document import Embedding
from common.models.user import Tenant from common.models.user import Tenant
from common.models.interaction import ChatSession, Interaction, InteractionEmbedding from common.models.interaction import ChatSession, Interaction, InteractionEmbedding
from common.extensions import db from common.extensions import db
from common.utils.celery_utils import current_celery from common.utils.celery_utils import current_celery
from common.utils.model_utils import select_model_variables, create_language_template, replace_variable_in_template from common.utils.model_utils import select_model_variables, create_language_template, replace_variable_in_template
from common.langchain.eveai_retriever import EveAIRetriever from common.langchain.retrievers.eveai_default_rag_retriever import EveAIDefaultRagRetriever
from common.langchain.eveai_history_retriever import EveAIHistoryRetriever from common.langchain.retrievers.eveai_history_retriever import EveAIHistoryRetriever
from common.utils.business_event import BusinessEvent from common.utils.business_event import BusinessEvent
from common.utils.business_event_context import current_event from common.utils.business_event_context import current_event
@@ -36,7 +29,7 @@ def ping():
def detail_question(question, language, model_variables, session_id): def detail_question(question, language, model_variables, session_id):
current_app.logger.debug(f'Detail question: {question}') current_app.logger.debug(f'Detail question: {question}')
current_app.logger.debug(f'model_varialbes: {model_variables}') current_app.logger.debug(f'model_variables: {model_variables}')
current_app.logger.debug(f'session_id: {session_id}') current_app.logger.debug(f'session_id: {session_id}')
retriever = EveAIHistoryRetriever(model_variables=model_variables, session_id=session_id) retriever = EveAIHistoryRetriever(model_variables=model_variables, session_id=session_id)
llm = model_variables['llm'] llm = model_variables['llm']
@@ -93,12 +86,6 @@ def ask_question(tenant_id, question, language, session_id, user_timezone, room)
current_app.logger.error(f'ask_question: Error initializing chat session in database: {e}') current_app.logger.error(f'ask_question: Error initializing chat session in database: {e}')
raise raise
if tenant.rag_tuning:
current_app.rag_tuning_logger.debug(f'Received question for tenant {tenant_id}:\n{question}. Processing...')
current_app.rag_tuning_logger.debug(f'Tenant Information: \n{tenant.to_dict()}')
current_app.rag_tuning_logger.debug(f'===================================================================')
current_app.rag_tuning_logger.debug(f'===================================================================')
with current_event.create_span("RAG Answer"): with current_event.create_span("RAG Answer"):
result, interaction = answer_using_tenant_rag(question, language, tenant, chat_session) result, interaction = answer_using_tenant_rag(question, language, tenant, chat_session)
result['algorithm'] = current_app.config['INTERACTION_ALGORITHMS']['RAG_TENANT']['name'] result['algorithm'] = current_app.config['INTERACTION_ALGORITHMS']['RAG_TENANT']['name']
@@ -138,22 +125,21 @@ def answer_using_tenant_rag(question, language, tenant, chat_session):
with current_event.create_span("Detail Question"): with current_event.create_span("Detail Question"):
detailed_question = detail_question(question, language, model_variables, chat_session.session_id) detailed_question = detail_question(question, language, model_variables, chat_session.session_id)
current_app.logger.debug(f'Original question:\n {question}\n\nDetailed question: {detailed_question}') if model_variables['rag_tuning']:
if tenant.rag_tuning:
current_app.rag_tuning_logger.debug(f'Detailed Question for tenant {tenant.id}:\n{question}.') current_app.rag_tuning_logger.debug(f'Detailed Question for tenant {tenant.id}:\n{question}.')
current_app.rag_tuning_logger.debug(f'-------------------------------------------------------------------') current_app.rag_tuning_logger.debug(f'-------------------------------------------------------------------')
new_interaction.detailed_question = detailed_question new_interaction.detailed_question = detailed_question
new_interaction.detailed_question_at = dt.now(tz.utc) new_interaction.detailed_question_at = dt.now(tz.utc)
with current_event.create_span("Generate Answer using RAG"): with current_event.create_span("Generate Answer using RAG"):
retriever = EveAIRetriever(model_variables, tenant_info) retriever = EveAIDefaultRagRetriever(model_variables, tenant_info)
llm = model_variables['llm'] llm = model_variables['llm']
template = model_variables['rag_template'] template = model_variables['rag_template']
language_template = create_language_template(template, language) language_template = create_language_template(template, language)
full_template = replace_variable_in_template(language_template, "{tenant_context}", model_variables['rag_context']) full_template = replace_variable_in_template(language_template, "{tenant_context}", model_variables['rag_context'])
rag_prompt = ChatPromptTemplate.from_template(full_template) rag_prompt = ChatPromptTemplate.from_template(full_template)
setup_and_retrieval = RunnableParallel({"context": retriever, "question": RunnablePassthrough()}) setup_and_retrieval = RunnableParallel({"context": retriever, "question": RunnablePassthrough()})
if tenant.rag_tuning: if model_variables['rag_tuning']:
current_app.rag_tuning_logger.debug(f'Full prompt for tenant {tenant.id}:\n{full_template}.') current_app.rag_tuning_logger.debug(f'Full prompt for tenant {tenant.id}:\n{full_template}.')
current_app.rag_tuning_logger.debug(f'-------------------------------------------------------------------') current_app.rag_tuning_logger.debug(f'-------------------------------------------------------------------')
@@ -181,7 +167,7 @@ def answer_using_tenant_rag(question, language, tenant, chat_session):
current_app.logger.debug(f'ask_question: result answer: {result['answer']}') current_app.logger.debug(f'ask_question: result answer: {result['answer']}')
current_app.logger.debug(f'ask_question: result citations: {result["citations"]}') current_app.logger.debug(f'ask_question: result citations: {result["citations"]}')
current_app.logger.debug(f'ask_question: insufficient information: {result["insufficient_info"]}') current_app.logger.debug(f'ask_question: insufficient information: {result["insufficient_info"]}')
if tenant.rag_tuning: if model_variables['rag_tuning']:
current_app.rag_tuning_logger.debug(f'ask_question: result answer: {result['answer']}') current_app.rag_tuning_logger.debug(f'ask_question: result answer: {result['answer']}')
current_app.rag_tuning_logger.debug(f'ask_question: result citations: {result["citations"]}') current_app.rag_tuning_logger.debug(f'ask_question: result citations: {result["citations"]}')
current_app.rag_tuning_logger.debug(f'ask_question: insufficient information: {result["insufficient_info"]}') current_app.rag_tuning_logger.debug(f'ask_question: insufficient information: {result["insufficient_info"]}')
@@ -197,7 +183,7 @@ def answer_using_tenant_rag(question, language, tenant, chat_session):
) )
existing_embedding_ids = [emb.id for emb in embeddings] existing_embedding_ids = [emb.id for emb in embeddings]
urls = list(set(emb.document_version.url for emb in embeddings)) urls = list(set(emb.document_version.url for emb in embeddings))
if tenant.rag_tuning: if model_variables['rag_tuning']:
current_app.rag_tuning_logger.debug(f'Referenced documents for answer for tenant {tenant.id}:\n') current_app.rag_tuning_logger.debug(f'Referenced documents for answer for tenant {tenant.id}:\n')
current_app.rag_tuning_logger.debug(f'{urls}') current_app.rag_tuning_logger.debug(f'{urls}')
current_app.rag_tuning_logger.debug(f'-------------------------------------------------------------------') current_app.rag_tuning_logger.debug(f'-------------------------------------------------------------------')
@@ -250,7 +236,7 @@ def answer_using_llm(question, language, tenant, chat_session):
new_interaction.detailed_question_at = dt.now(tz.utc) new_interaction.detailed_question_at = dt.now(tz.utc)
with current_event.create_span("Detail Answer using LLM"): with current_event.create_span("Detail Answer using LLM"):
retriever = EveAIRetriever(model_variables, tenant_info) retriever = EveAIDefaultRagRetriever(model_variables, tenant_info)
llm = model_variables['llm_no_rag'] llm = model_variables['llm_no_rag']
template = model_variables['encyclopedia_template'] template = model_variables['encyclopedia_template']
language_template = create_language_template(template, language) language_template = create_language_template(template, language)

View File

@@ -5,13 +5,14 @@ from datetime import datetime as dt, timezone as tz, datetime
from celery import states from celery import states
from dateutil.relativedelta import relativedelta from dateutil.relativedelta import relativedelta
from flask import current_app from flask import current_app
from sqlalchemy import or_, and_ from sqlalchemy import or_, and_, text
from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.exc import SQLAlchemyError
from common.extensions import db from common.extensions import db
from common.models.user import Tenant from common.models.user import Tenant
from common.models.entitlements import BusinessEventLog, LicenseUsage, License from common.models.entitlements import BusinessEventLog, LicenseUsage, License
from common.utils.celery_utils import current_celery from common.utils.celery_utils import current_celery
from common.utils.eveai_exceptions import EveAINoLicenseForTenant from common.utils.eveai_exceptions import EveAINoLicenseForTenant, EveAIException
from common.utils.database import Database
# Healthcheck task # Healthcheck task
@@ -24,32 +25,54 @@ def ping():
def update_usages(): def update_usages():
current_timestamp = dt.now(tz.utc) current_timestamp = dt.now(tz.utc)
tenant_ids = get_all_tenant_ids() tenant_ids = get_all_tenant_ids()
# List to collect all errors
error_list = []
for tenant_id in tenant_ids: for tenant_id in tenant_ids:
tenant = Tenant.query.get(tenant_id) try:
if tenant.storage_dirty: Database(tenant_id).switch_schema()
recalculate_storage_for_tenant(tenant) check_and_create_license_usage_for_tenant(tenant_id)
check_and_create_license_usage_for_tenant(tenant_id) tenant = Tenant.query.get(tenant_id)
logs = get_logs_for_processing(tenant_id, current_timestamp) if tenant.storage_dirty:
if not logs: recalculate_storage_for_tenant(tenant)
continue # If no logs to be processed, continu to the next tenant logs = get_logs_for_processing(tenant_id, current_timestamp)
if not logs:
continue # If no logs to be processed, continu to the next tenant
# Get the min and max timestamp from the logs # Get the min and max timestamp from the logs
min_timestamp = min(log.timestamp for log in logs) min_timestamp = min(log.timestamp for log in logs)
max_timestamp = max(log.timestamp for log in logs) max_timestamp = max(log.timestamp for log in logs)
# Retrieve relevant LicenseUsage records # Retrieve relevant LicenseUsage records
license_usages = get_relevant_license_usages(db.session, tenant_id, min_timestamp, max_timestamp) current_app.logger.debug(f"Searching relevant usages for tenant {tenant_id}")
license_usages = get_relevant_license_usages(db.session, tenant_id, min_timestamp, max_timestamp)
current_app.logger.debug(f"Found {license_usages}, end searching relevant usages for tenant {tenant_id}")
# Split logs based on LicenseUsage periods # Split logs based on LicenseUsage periods
logs_by_usage = split_logs_by_license_usage(logs, license_usages) current_app.logger.debug(f"Splitting usages for tenant {tenant_id}")
logs_by_usage = split_logs_by_license_usage(logs, license_usages)
current_app.logger.debug(f"Found {logs_by_usage}, end splitting logs for tenant {tenant_id}")
# Now you can process logs for each LicenseUsage # Now you can process logs for each LicenseUsage
for license_usage_id, logs in logs_by_usage.items(): for license_usage_id, logs in logs_by_usage.items():
process_logs_for_license_usage(tenant_id, license_usage_id, logs) current_app.logger.debug(f"Processing logs for usage id {license_usage_id} for tenant {tenant_id}")
process_logs_for_license_usage(tenant_id, license_usage_id, logs)
current_app.logger.debug(f"Finished processing logs for tenant {tenant_id}")
except Exception as e:
error = f"Usage Calculation error for Tenant {tenant_id}: {e}"
error_list.append(error)
current_app.logger.error(error)
continue
if error_list:
raise Exception('\n'.join(error_list))
return "Update Usages taks completed successfully"
def get_all_tenant_ids(): def get_all_tenant_ids():
tenant_ids = db.session.query(Tenant.tenant_id).all() tenant_ids = db.session.query(Tenant.id).all()
return [tenant_id[0] for tenant_id in tenant_ids] # Extract tenant_id from tuples return [tenant_id[0] for tenant_id in tenant_ids] # Extract tenant_id from tuples
@@ -57,21 +80,21 @@ def check_and_create_license_usage_for_tenant(tenant_id):
current_date = dt.now(tz.utc).date() current_date = dt.now(tz.utc).date()
license_usages = (db.session.query(LicenseUsage) license_usages = (db.session.query(LicenseUsage)
.filter_by(tenant_id=tenant_id) .filter_by(tenant_id=tenant_id)
.filter_by(and_(LicenseUsage.period_start_date <= current_date, .filter(and_(LicenseUsage.period_start_date <= current_date,
LicenseUsage.period_end_date >= current_date)) LicenseUsage.period_end_date >= current_date))
.all()) .all())
if not license_usages: if not license_usages:
active_license = (db.session.query(License).filter_by(tenant_id=tenant_id) active_license = (db.session.query(License).filter_by(tenant_id=tenant_id)
.filter_by(and_(License.start_date <= current_date, .filter(and_(License.start_date <= current_date,
License.end_date >= current_date)) License.end_date >= current_date))
.one()) .one_or_none())
if not active_license: if not active_license:
current_app.logger.error(f"No License defined for {tenant_id}. " current_app.logger.error(f"No License defined for {tenant_id}. "
f"Impossible to calculate license usage.") f"Impossible to calculate license usage.")
raise EveAINoLicenseForTenant(message=f"No License defined for {tenant_id}. " raise EveAINoLicenseForTenant(message=f"No License defined for {tenant_id}. "
f"Impossible to calculate license usage.") f"Impossible to calculate license usage.")
start_date, end_date = calculate_valid_period(current_date, active_license.period_start_date) start_date, end_date = calculate_valid_period(current_date, active_license.start_date)
new_license_usage = LicenseUsage(period_start_date=start_date, new_license_usage = LicenseUsage(period_start_date=start_date,
period_end_date=end_date, period_end_date=end_date,
license_id=active_license.id, license_id=active_license.id,
@@ -124,8 +147,8 @@ def get_relevant_license_usages(session, tenant_id, min_timestamp, max_timestamp
# Fetch LicenseUsage records where the log timestamps fall between period_start_date and period_end_date # Fetch LicenseUsage records where the log timestamps fall between period_start_date and period_end_date
return session.query(LicenseUsage).filter( return session.query(LicenseUsage).filter(
LicenseUsage.tenant_id == tenant_id, LicenseUsage.tenant_id == tenant_id,
LicenseUsage.period_start_date <= max_timestamp, LicenseUsage.period_start_date <= max_timestamp.date(),
LicenseUsage.period_end_date >= min_timestamp LicenseUsage.period_end_date >= min_timestamp.date()
).order_by(LicenseUsage.period_start_date).all() ).order_by(LicenseUsage.period_start_date).all()
@@ -136,7 +159,7 @@ def split_logs_by_license_usage(logs, license_usages):
for log in logs: for log in logs:
# Find the corresponding LicenseUsage for each log based on the timestamp # Find the corresponding LicenseUsage for each log based on the timestamp
for license_usage in license_usages: for license_usage in license_usages:
if license_usage.period_start_date <= log.timestamp <= license_usage.period_end_date: if license_usage.period_start_date <= log.timestamp.date() <= license_usage.period_end_date:
logs_by_usage[license_usage.id].append(log) logs_by_usage[license_usage.id].append(log)
break break
@@ -181,7 +204,7 @@ def process_logs_for_license_usage(tenant_id, license_usage_id, logs):
log.license_usage_id = license_usage_id log.license_usage_id = license_usage_id
# Update the LicenseUsage record with the accumulated values # Update the LicenseUsage record with the accumulated values
license_usage.embedding_mb += embedding_mb_used license_usage.embedding_mb_used += embedding_mb_used
license_usage.embedding_prompt_tokens_used += embedding_prompt_tokens_used license_usage.embedding_prompt_tokens_used += embedding_prompt_tokens_used
license_usage.embedding_completion_tokens_used += embedding_completion_tokens_used license_usage.embedding_completion_tokens_used += embedding_completion_tokens_used
license_usage.embedding_total_tokens_used += embedding_total_tokens_used license_usage.embedding_total_tokens_used += embedding_total_tokens_used
@@ -189,27 +212,31 @@ def process_logs_for_license_usage(tenant_id, license_usage_id, logs):
license_usage.interaction_completion_tokens_used += interaction_completion_tokens_used license_usage.interaction_completion_tokens_used += interaction_completion_tokens_used
license_usage.interaction_total_tokens_used += interaction_total_tokens_used license_usage.interaction_total_tokens_used += interaction_total_tokens_used
current_app.logger.debug(f"Processed logs for license usage {license_usage.id}:\n{license_usage}")
# Commit the updates to the LicenseUsage and log records # Commit the updates to the LicenseUsage and log records
try: try:
db.session.add(license_usage) db.session.add(license_usage)
db.session.add(logs) for log in logs:
db.session.add(log)
db.session.commit() db.session.commit()
except SQLAlchemyError as e: except SQLAlchemyError as e:
db.session.rollback() db.session.rollback()
current_app.logger.error(f"Error trying to update license usage and logs for tenant {tenant_id}. ") current_app.logger.error(f"Error trying to update license usage and logs for tenant {tenant_id}: {e}")
raise e raise e
def recalculate_storage_for_tenant(tenant): def recalculate_storage_for_tenant(tenant):
# Perform a SUM operation to get the total file size from document_versions # Perform a SUM operation to get the total file size from document_versions
total_storage = db.session.execute(f""" total_storage = db.session.execute(text(f"""
SELECT SUM(file_size) SELECT SUM(file_size)
FROM {tenant.id}.document_versions FROM document_version
""").scalar() """)).scalar()
current_app.logger.debug(f"Recalculating storage for tenant {tenant} - Total storage: {total_storage}")
# Update the LicenseUsage with the recalculated storage # Update the LicenseUsage with the recalculated storage
license_usage = db.session.query(LicenseUsage).filter_by(tenant_id=tenant.id).first() license_usage = db.session.query(LicenseUsage).filter_by(tenant_id=tenant.id).first()
license_usage.storage_mb = total_storage / (1024 * 1024) # Convert bytes to MB license_usage.storage_mb_used = total_storage
# Reset the dirty flag after recalculating # Reset the dirty flag after recalculating
tenant.storage_dirty = False tenant.storage_dirty = False

View File

@@ -27,10 +27,8 @@ class AudioProcessor(TranscriptionProcessor):
def _get_transcription(self): def _get_transcription(self):
file_data = minio_client.download_document_file( file_data = minio_client.download_document_file(
self.tenant.id, self.tenant.id,
self.document_version.doc_id, self.document_version.bucket_name,
self.document_version.language, self.document_version.object_name,
self.document_version.id,
self.document_version.file_name
) )
with current_event.create_span("Audio Compression"): with current_event.create_span("Audio Compression"):

View File

@@ -15,6 +15,7 @@ class HTMLProcessor(Processor):
self.html_end_tags = model_variables['html_end_tags'] self.html_end_tags = model_variables['html_end_tags']
self.html_included_elements = model_variables['html_included_elements'] self.html_included_elements = model_variables['html_included_elements']
self.html_excluded_elements = model_variables['html_excluded_elements'] self.html_excluded_elements = model_variables['html_excluded_elements']
self.html_excluded_classes = model_variables['html_excluded_classes']
self.chunk_size = model_variables['processing_chunk_size'] # Adjust this based on your LLM's optimal input size self.chunk_size = model_variables['processing_chunk_size'] # Adjust this based on your LLM's optimal input size
self.chunk_overlap = model_variables[ self.chunk_overlap = model_variables[
'processing_chunk_overlap'] # Adjust for context preservation between chunks 'processing_chunk_overlap'] # Adjust for context preservation between chunks
@@ -24,10 +25,8 @@ class HTMLProcessor(Processor):
try: try:
file_data = minio_client.download_document_file( file_data = minio_client.download_document_file(
self.tenant.id, self.tenant.id,
self.document_version.doc_id, self.document_version.bucket_name,
self.document_version.language, self.document_version.object_name,
self.document_version.id,
self.document_version.file_name
) )
html_content = file_data.decode('utf-8') html_content = file_data.decode('utf-8')
@@ -47,7 +46,7 @@ class HTMLProcessor(Processor):
self._log(f'Parsing HTML for tenant {self.tenant.id}') self._log(f'Parsing HTML for tenant {self.tenant.id}')
soup = BeautifulSoup(html_content, 'html.parser') soup = BeautifulSoup(html_content, 'html.parser')
extracted_html = '' extracted_html = ''
excluded_classes = self._parse_excluded_classes(self.tenant.html_excluded_classes) excluded_classes = self._parse_excluded_classes(self.html_excluded_classes)
if self.html_included_elements: if self.html_included_elements:
elements_to_parse = soup.find_all(self.html_included_elements) elements_to_parse = soup.find_all(self.html_included_elements)

View File

@@ -27,10 +27,8 @@ class PDFProcessor(Processor):
try: try:
file_data = minio_client.download_document_file( file_data = minio_client.download_document_file(
self.tenant.id, self.tenant.id,
self.document_version.doc_id, self.document_version.bucket_name,
self.document_version.language, self.document_version.object_name,
self.document_version.id,
self.document_version.file_name
) )
with current_event.create_span("PDF Extraction"): with current_event.create_span("PDF Extraction"):

View File

@@ -7,10 +7,8 @@ class SRTProcessor(TranscriptionProcessor):
def _get_transcription(self): def _get_transcription(self):
file_data = minio_client.download_document_file( file_data = minio_client.download_document_file(
self.tenant.id, self.tenant.id,
self.document_version.doc_id, self.document_version.bucket_name,
self.document_version.language, self.document_version.object_name,
self.document_version.id,
self.document_version.file_name
) )
srt_content = file_data.decode('utf-8') srt_content = file_data.decode('utf-8')
return self._clean_srt(srt_content) return self._clean_srt(srt_content)

View File

@@ -44,3 +44,4 @@ def register_extensions(app):
app, celery = create_app() app, celery = create_app()

View File

@@ -13,7 +13,7 @@ from langchain_core.runnables import RunnablePassthrough
from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.exc import SQLAlchemyError
from common.extensions import db, minio_client from common.extensions import db, minio_client
from common.models.document import DocumentVersion, Embedding from common.models.document import DocumentVersion, Embedding, Document
from common.models.user import Tenant from common.models.user import Tenant
from common.utils.celery_utils import current_celery from common.utils.celery_utils import current_celery
from common.utils.database import Database from common.utils.database import Database
@@ -36,34 +36,40 @@ def ping():
@current_celery.task(name='create_embeddings', queue='embeddings') @current_celery.task(name='create_embeddings', queue='embeddings')
def create_embeddings(tenant_id, document_version_id): def create_embeddings(tenant_id, document_version_id):
# Retrieve document version to process try:
document_version = DocumentVersion.query.get(document_version_id) # Retrieve Tenant for which we are processing
if document_version is None: tenant = Tenant.query.get(tenant_id)
raise Exception(f'Document version {document_version_id} not found') if tenant is None:
raise Exception(f'Tenant {tenant_id} not found')
# Ensure we are working in the correct database schema
Database(tenant_id).switch_schema()
# Retrieve document version to process
document_version = DocumentVersion.query.get(document_version_id)
if document_version is None:
raise Exception(f'Document version {document_version_id} not found')
# Retrieve the Catalog ID
doc = Document.query.get_or_404(document_version.doc_id)
catalog_id = doc.catalog_id
# Select variables to work with depending on tenant and model
model_variables = select_model_variables(tenant, catalog_id=catalog_id)
current_app.logger.debug(f'Model variables: {model_variables}')
except Exception as e:
current_app.logger.error(f'Create Embeddings request received '
f'for non existing document version {document_version_id} '
f'for tenant {tenant_id}, '
f'error: {e}')
raise
# BusinessEvent creates a context, which is why we need to use it with a with block # BusinessEvent creates a context, which is why we need to use it with a with block
with BusinessEvent('Create Embeddings', tenant_id, with BusinessEvent('Create Embeddings', tenant_id,
document_version_id=document_version_id, document_version_id=document_version_id,
document_version_file_size=document_version.file_size): document_version_file_size=document_version.file_size):
current_app.logger.info(f'Creating embeddings for tenant {tenant_id} on document version {document_version_id}') current_app.logger.info(f'Creating embeddings for tenant {tenant_id} on document version {document_version_id}')
try:
# Retrieve Tenant for which we are processing
tenant = Tenant.query.get(tenant_id)
if tenant is None:
raise Exception(f'Tenant {tenant_id} not found')
# Ensure we are working in the correct database schema
Database(tenant_id).switch_schema()
# Select variables to work with depending on tenant and model
model_variables = select_model_variables(tenant)
current_app.logger.debug(f'Model variables: {model_variables}')
except Exception as e:
current_app.logger.error(f'Create Embeddings request received '
f'for non existing document version {document_version_id} '
f'for tenant {tenant_id}, '
f'error: {e}')
raise
try: try:
db.session.add(document_version) db.session.add(document_version)
@@ -204,7 +210,7 @@ def enrich_chunks(tenant, model_variables, document_version, title, chunks):
if len(chunks) > 1: if len(chunks) > 1:
summary = summarize_chunk(tenant, model_variables, document_version, chunks[0]) summary = summarize_chunk(tenant, model_variables, document_version, chunks[0])
chunk_total_context = (f'Filename: {document_version.file_name}\n' chunk_total_context = (f'Filename: {document_version.object_name}\n'
f'User Context:\n{document_version.user_context}\n\n' f'User Context:\n{document_version.user_context}\n\n'
f'User Metadata:\n{document_version.user_metadata}\n\n' f'User Metadata:\n{document_version.user_metadata}\n\n'
f'Title: {title}\n' f'Title: {title}\n'
@@ -213,7 +219,7 @@ def enrich_chunks(tenant, model_variables, document_version, title, chunks):
f'System Metadata:\n{document_version.system_metadata}\n\n' f'System Metadata:\n{document_version.system_metadata}\n\n'
) )
enriched_chunks = [] enriched_chunks = []
initial_chunk = (f'Filename: {document_version.file_name}\n' initial_chunk = (f'Filename: {document_version.object_name}\n'
f'User Context:\n{document_version.user_context}\n\n' f'User Context:\n{document_version.user_context}\n\n'
f'User Metadata:\n{document_version.user_metadata}\n\n' f'User Metadata:\n{document_version.user_metadata}\n\n'
f'Title: {title}\n' f'Title: {title}\n'
@@ -304,13 +310,12 @@ def log_parsing_info(tenant, tags, included_elements, excluded_elements, exclude
def create_potential_chunks_for_markdown(tenant_id, document_version, input_file): def create_potential_chunks_for_markdown(tenant_id, document_version, input_file):
try: try:
current_app.logger.info(f'Creating potential chunks for tenant {tenant_id}') current_app.logger.info(f'Creating potential chunks for tenant {tenant_id}')
markdown_on = document_version.object_name.rsplit('.', 1)[0] + '.md'
# Download the markdown file from MinIO # Download the markdown file from MinIO
markdown_data = minio_client.download_document_file(tenant_id, markdown_data = minio_client.download_document_file(tenant_id,
document_version.doc_id, document_version.bucket_name,
document_version.language, markdown_on,
document_version.id,
input_file
) )
markdown = markdown_data.decode('utf-8') markdown = markdown_data.decode('utf-8')

View File

@@ -51,6 +51,10 @@ No additional configuration is needed; the plugin will automatically detect the
## Versions ## Versions
### 1.1.1 - Add Reinitialisation functionality
### 1.1.0 - Add Catalog Functionality
### 1.0.x - Bugfixing Releases ### 1.0.x - Bugfixing Releases
### 1.0.0 - Initial Release ### 1.0.0 - Initial Release

View File

@@ -3,7 +3,7 @@
* Plugin Name: EveAI Sync * Plugin Name: EveAI Sync
* Plugin URI: https://askeveai.com/ * Plugin URI: https://askeveai.com/
* Description: Synchronizes WordPress content with EveAI API. * Description: Synchronizes WordPress content with EveAI API.
* Version: 1.0.16 * Version: 1.1.1
* Author: Josako, Pieter Laroy * Author: Josako, Pieter Laroy
* Author URI: https://askeveai.com/about/ * Author URI: https://askeveai.com/about/
* License: GPL v2 or later * License: GPL v2 or later
@@ -17,7 +17,7 @@ if (!defined('ABSPATH')) {
} }
// Define plugin constants // Define plugin constants
define('EVEAI_SYNC_VERSION', '1.0.0'); define('EVEAI_SYNC_VERSION', '1.1.1');
define('EVEAI_SYNC_PLUGIN_DIR', plugin_dir_path(__FILE__)); define('EVEAI_SYNC_PLUGIN_DIR', plugin_dir_path(__FILE__));
define('EVEAI_SYNC_PLUGIN_URL', plugin_dir_url(__FILE__)); define('EVEAI_SYNC_PLUGIN_URL', plugin_dir_url(__FILE__));
@@ -50,6 +50,30 @@ function eveai_delete_post_meta($post_id) {
} }
add_action('before_delete_post', 'eveai_delete_post_meta'); add_action('before_delete_post', 'eveai_delete_post_meta');
// Clean metadata from Wordpress site
function eveai_reinitialize_site() {
check_ajax_referer('eveai_reinitialize_site', 'nonce');
if (!current_user_can('manage_options')) {
wp_send_json_error('You do not have permission to perform this action.');
return;
}
global $wpdb;
// Remove all EveAI-related post meta
$wpdb->query("DELETE FROM $wpdb->postmeta WHERE meta_key LIKE '_eveai_%'");
// Remove all EveAI-related options
delete_option('eveai_last_sync_time');
delete_option('eveai_sync_status');
// Optionally, you might want to clear any custom tables if you have any
wp_send_json_success('Site reinitialized. All EveAI metadata has been removed.');
}
add_action('wp_ajax_eveai_reinitialize_site', 'eveai_reinitialize_site');
// Display sync info in post // Display sync info in post
function eveai_display_sync_info($post) { function eveai_display_sync_info($post) {
$document_id = get_post_meta($post->ID, '_eveai_document_id', true); $document_id = get_post_meta($post->ID, '_eveai_document_id', true);

View File

@@ -16,6 +16,7 @@ class EveAI_Admin {
register_setting('eveai_settings', 'eveai_excluded_categories'); register_setting('eveai_settings', 'eveai_excluded_categories');
register_setting('eveai_settings', 'eveai_access_token'); register_setting('eveai_settings', 'eveai_access_token');
register_setting('eveai_settings', 'eveai_token_expiry'); register_setting('eveai_settings', 'eveai_token_expiry');
register_setting('eveai_settings', 'eveai_catalog_id');
} }
public function add_admin_menu() { public function add_admin_menu() {
@@ -50,6 +51,10 @@ class EveAI_Admin {
<th scope="row">API Key</th> <th scope="row">API Key</th>
<td><input type="text" name="eveai_api_key" value="<?php echo esc_attr(get_option('eveai_api_key')); ?>" style="width: 100%;" /></td> <td><input type="text" name="eveai_api_key" value="<?php echo esc_attr(get_option('eveai_api_key')); ?>" style="width: 100%;" /></td>
</tr> </tr>
<tr valign="top">
<th scope="row">Catalog ID</th>
<td><input type="text" name="eveai_catalog_id" value="<?php echo esc_attr(get_option('eveai_catalog_id')); ?>" style="width: 100%;" /></td>
</tr>
<tr valign="top"> <tr valign="top">
<th scope="row">Default Language</th> <th scope="row">Default Language</th>
<td><input type="text" name="eveai_default_language" value="<?php echo esc_attr(get_option('eveai_default_language', 'en')); ?>" style="width: 100%;" /></td> <td><input type="text" name="eveai_default_language" value="<?php echo esc_attr(get_option('eveai_default_language', 'en')); ?>" style="width: 100%;" /></td>
@@ -71,6 +76,11 @@ class EveAI_Admin {
<?php wp_nonce_field('eveai_bulk_sync', 'eveai_bulk_sync_nonce'); ?> <?php wp_nonce_field('eveai_bulk_sync', 'eveai_bulk_sync_nonce'); ?>
<input type="submit" name="eveai_bulk_sync" class="button button-primary" value="Start Bulk Sync"> <input type="submit" name="eveai_bulk_sync" class="button button-primary" value="Start Bulk Sync">
</form> </form>
<h2>Reinitialize Site</h2>
<p>Click the button below to remove all EveAI metadata from your site. This will reset the sync status for all posts and pages.</p>
<button id="eveai-reinitialize" class="button button-secondary">Reinitialize Site</button>
<div id="eveai-sync-results" style="margin-top: 20px;"></div> <div id="eveai-sync-results" style="margin-top: 20px;"></div>
</div> </div>
<script> <script>
@@ -105,6 +115,29 @@ class EveAI_Admin {
}); });
} }
}); });
$('#eveai-reinitialize').on('click', function(e) {
e.preventDefault();
if (confirm('Are you sure you want to reinitialize? This will remove all EveAI metadata from your site.')) {
$.ajax({
url: ajaxurl,
type: 'POST',
data: {
action: 'eveai_reinitialize_site',
nonce: '<?php echo wp_create_nonce('eveai_reinitialize_site'); ?>'
},
success: function(response) {
if (response.success) {
alert(response.data);
} else {
alert('Error: ' + response.data);
}
},
error: function() {
alert('An error occurred. Please try again.');
}
});
}
});
}); });
</script> </script>
<?php <?php

View File

@@ -13,6 +13,7 @@ class EveAI_API {
$this->api_key = get_option('eveai_api_key'); $this->api_key = get_option('eveai_api_key');
$this->access_token = get_option('eveai_access_token'); $this->access_token = get_option('eveai_access_token');
$this->token_expiry = get_option('eveai_token_expiry', 0); $this->token_expiry = get_option('eveai_token_expiry', 0);
$this->catalog_id = get_option('eveai_catalog_id');
} }
private function ensure_valid_token() { private function ensure_valid_token() {
@@ -111,6 +112,7 @@ class EveAI_API {
} }
public function add_url($data) { public function add_url($data) {
$data['catalog_id'] = get_option('eveai_catalog_id'); // Include catalog_id
return $this->make_request('POST', '/api/v1/documents/add_url', $data); return $this->make_request('POST', '/api/v1/documents/add_url', $data);
} }

View File

@@ -0,0 +1,24 @@
"""Set storage_dirty flag for all tenants
Revision ID: 02debd224316
Revises: 8fdd7f2965c1
Create Date: 2024-10-08 06:53:17.261709
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = '02debd224316'
down_revision = '8fdd7f2965c1'
branch_labels = None
depends_on = None
def upgrade():
op.execute('UPDATE tenant SET storage_dirty = TRUE')
def downgrade():
pass

View File

@@ -0,0 +1,46 @@
"""LicenseUsage: correct mb fields to be floats iso integers
Revision ID: a678c84d5633
Revises: 02debd224316
Create Date: 2024-10-11 08:03:22.823327
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = 'a678c84d5633'
down_revision = '02debd224316'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('license_usage', schema=None) as batch_op:
batch_op.alter_column('storage_mb_used',
existing_type=sa.INTEGER(),
type_=sa.Float(),
existing_nullable=True)
batch_op.alter_column('embedding_mb_used',
existing_type=sa.INTEGER(),
type_=sa.Float(),
existing_nullable=True)
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('license_usage', schema=None) as batch_op:
batch_op.alter_column('embedding_mb_used',
existing_type=sa.Float(),
type_=sa.INTEGER(),
existing_nullable=True)
batch_op.alter_column('storage_mb_used',
existing_type=sa.Float(),
type_=sa.INTEGER(),
existing_nullable=True)
# ### end Alembic commands ###

View File

@@ -1,5 +1,7 @@
import inspect import inspect
import logging import logging
import sys
import os
from logging.config import fileConfig from logging.config import fileConfig
from flask import current_app from flask import current_app
@@ -19,8 +21,26 @@ config = context.config
# Interpret the config file for Python logging. # Interpret the config file for Python logging.
# This line sets up loggers basically. # This line sets up loggers basically.
fileConfig(config.config_file_name) # fileConfig(config.config_file_name)
logger = logging.getLogger('alembic.env') # logger = logging.getLogger('alembic.env')
logging.basicConfig(
stream=sys.stdout,
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger()
# Reset handlers to avoid issues with Alembic overriding them
for handler in logger.handlers:
logger.removeHandler(handler)
# Add a stream handler to output logs to the console
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
logger.addHandler(console_handler)
logger.setLevel(logging.INFO)
def get_engine(): def get_engine():
@@ -124,31 +144,35 @@ def run_migrations_online():
with connectable.connect() as connection: with connectable.connect() as connection:
tenants = get_tenant_ids() tenants = get_tenant_ids()
for tenant in tenants: for tenant in tenants:
logger.info(f"Migrating tenant: {tenant}") try:
# set search path on the connection, which ensures that os.environ['TENANT_ID'] = str(tenant)
# PostgreSQL will emit all CREATE / ALTER / DROP statements logger.info(f"Migrating tenant: {tenant}")
# in terms of this schema by default # set search path on the connection, which ensures that
connection.execute(text(f'SET search_path TO "{tenant}", public')) # PostgreSQL will emit all CREATE / ALTER / DROP statements
# in SQLAlchemy v2+ the search path change needs to be committed # in terms of this schema by default
connection.commit() connection.execute(text(f'SET search_path TO "{tenant}", public'))
# in SQLAlchemy v2+ the search path change needs to be committed
connection.commit()
# make use of non-supported SQLAlchemy attribute to ensure # make use of non-supported SQLAlchemy attribute to ensure
# the dialect reflects tables in terms of the current tenant name # the dialect reflects tables in terms of the current tenant name
connection.dialect.default_schema_name = str(tenant) connection.dialect.default_schema_name = str(tenant)
context.configure( context.configure(
connection=connection, connection=connection,
target_metadata=get_metadata(), target_metadata=get_metadata(),
# literal_binds=True, # literal_binds=True,
include_object=include_object, include_object=include_object,
) )
with context.begin_transaction(): with context.begin_transaction():
context.run_migrations() context.run_migrations()
# for checking migrate or upgrade is running # for checking migrate or upgrade is running
if getattr(config.cmd_opts, "autogenerate", False): if getattr(config.cmd_opts, "autogenerate", False):
break break
except Exception as e:
continue
if context.is_offline_mode(): if context.is_offline_mode():

View File

@@ -0,0 +1,56 @@
"""Adding Catalogs to Document Domain
Revision ID: 0f5932ff3051
Revises: 5a75fb6da7b8
Create Date: 2024-10-14 15:20:45.058329
"""
from alembic import op
import sqlalchemy as sa
import pgvector
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = '0f5932ff3051'
down_revision = '5a75fb6da7b8'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('catalog',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('name', sa.String(length=50), nullable=False),
sa.Column('description', sa.Text(), nullable=True),
sa.Column('html_tags', postgresql.ARRAY(sa.String(length=10)), nullable=True),
sa.Column('html_end_tags', postgresql.ARRAY(sa.String(length=10)), nullable=True),
sa.Column('html_included_elements', postgresql.ARRAY(sa.String(length=50)), nullable=True),
sa.Column('html_excluded_elements', postgresql.ARRAY(sa.String(length=50)), nullable=True),
sa.Column('html_excluded_classes', postgresql.ARRAY(sa.String(length=200)), nullable=True),
sa.Column('min_chunk_size', sa.Integer(), nullable=True),
sa.Column('max_chunk_size', sa.Integer(), nullable=True),
sa.Column('es_k', sa.Integer(), nullable=True),
sa.Column('es_similarity_threshold', sa.Float(), nullable=True),
sa.Column('chat_RAG_temperature', sa.Float(), nullable=True),
sa.Column('chat_no_RAG_temperature', sa.Float(), nullable=True),
sa.Column('embed_tuning', sa.Boolean(), nullable=True),
sa.Column('rag_tuning', sa.Boolean(), nullable=True),
sa.Column('created_at', sa.DateTime(), server_default=sa.text('now()'), nullable=False),
sa.Column('created_by', sa.Integer(), nullable=True),
sa.Column('updated_at', sa.DateTime(), server_default=sa.text('now()'), nullable=False),
sa.Column('updated_by', sa.Integer(), nullable=True),
sa.ForeignKeyConstraint(['created_by'], ['public.user.id'], ),
sa.ForeignKeyConstraint(['updated_by'], ['public.user.id'], ),
sa.PrimaryKeyConstraint('id')
)
op.add_column('document', sa.Column('catalog_id', sa.Integer(), nullable=True))
op.create_foreign_key(None, 'document', 'catalog', ['catalog_id'], ['id'])
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column('document', 'catalog_id')
op.drop_table('catalog')
# ### end Alembic commands ###

View File

@@ -0,0 +1,99 @@
"""Upgrading Existing documents to default catalog
Revision ID: 28984b05d396
Revises: 0f5932ff3051
Create Date: 2024-10-15 09:02:23.355660
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.orm import Session
import pgvector
from common.models.document import Catalog, Document
from common.models.user import Tenant
from flask import current_app
import logging
import os
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Create a console handler
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.INFO)
# Create a logging format
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
console_handler.setFormatter(formatter)
# Add the handler to the logger
logger.addHandler(console_handler)
logger.info("Starting revision upgrade 28984b05d396")
# revision identifiers, used by Alembic.
revision = '28984b05d396'
down_revision = '0f5932ff3051'
branch_labels = None
depends_on = None
def upgrade():
logger.info("Starting migration: Creating default catalog and updating documents.")
bind = op.get_bind()
session = Session(bind=bind)
try:
tenant_id = int(os.getenv('TENANT_ID'))
logger.info(f"In migration: tenant_id = {tenant_id}")
# Step 1: Create a new Default Catalog
logger.info("Creating default catalog")
default_catalog = Catalog(
name='Default Catalog',
description=None,
)
tenant = Tenant.query.get_or_404(tenant_id)
default_catalog.html_tags = tenant.html_tags
default_catalog.html_end_tags = tenant.html_end_tags
default_catalog.html_included_elements = tenant.html_included_elements
default_catalog.html_excluded_elements = tenant.html_excluded_elements
default_catalog.html_excluded_classes = tenant.html_excluded_classes
default_catalog.min_chunk_size = tenant.min_chunk_size
default_catalog.max_chunk_size = tenant.max_chunk_size
default_catalog.es_k = tenant.es_k
default_catalog.es_similarity_threshold = tenant.es_similarity_threshold
default_catalog.chat_RAG_temperature = tenant.chat_RAG_temperature
default_catalog.chat_no_RAG_temperature = tenant.chat_no_RAG_temperature
default_catalog.embed_tuning = tenant.embed_tuning
default_catalog.rag_tuning = tenant.rag_tuning
session.add(default_catalog)
session.commit()
new_catalog_id = default_catalog.id
logger.info(f"Default catalog created with ID: {new_catalog_id}")
# Step 2: Update all documents to use this new catalog
logger.info("Updating documents with the new catalog ID.")
documents = session.query(Document).all()
for document in documents:
document.catalog_id = new_catalog_id
session.commit()
logger.info(f"Updated {len(documents)} documents with new catalog ID.")
except Exception as e:
logger.error(f"An error occurred during migration: {e}")
session.rollback()
raise
finally:
session.close()
logger.info("Migration completed successfully.")
def downgrade():
pass

View File

@@ -26,55 +26,55 @@ def upgrade():
op.add_column('document_version', sa.Column('object_name', sa.String(length=200), nullable=True)) op.add_column('document_version', sa.Column('object_name', sa.String(length=200), nullable=True))
op.add_column('document_version', sa.Column('file_size', sa.Float(), nullable=True)) op.add_column('document_version', sa.Column('file_size', sa.Float(), nullable=True))
# ### Upgrade values for bucket_name, object_name and file_size to reflect minio reality ### # # ### Upgrade values for bucket_name, object_name and file_size to reflect minio reality ###
from common.models.document import DocumentVersion # from common.models.document import DocumentVersion
from common.extensions import minio_client # from common.extensions import minio_client
from minio.error import S3Error # from minio.error import S3Error
#
# Create a connection # # Create a connection
connection = op.get_bind() # connection = op.get_bind()
session = Session(bind=connection) # session = Session(bind=connection)
#
# Get the current schema name (which should be the tenant ID) # # Get the current schema name (which should be the tenant ID)
current_schema = connection.execute(text("SELECT current_schema()")).scalar() # current_schema = connection.execute(text("SELECT current_schema()")).scalar()
tenant_id = int(current_schema) # tenant_id = int(current_schema)
#
doc_versions = session.query(DocumentVersion).all() # doc_versions = session.query(DocumentVersion).all()
for doc_version in doc_versions: # for doc_version in doc_versions:
try: # try:
object_name = minio_client.generate_object_name(doc_version.doc_id, # object_name = minio_client.generate_object_name(doc_version.doc_id,
doc_version.language, # doc_version.language,
doc_version.id, # doc_version.id,
doc_version.file_name) # doc_version.file_name)
bucket_name = minio_client.generate_bucket_name(tenant_id) # bucket_name = minio_client.generate_bucket_name(tenant_id)
doc_version.object_name = object_name # doc_version.object_name = object_name
doc_version.bucket_name = bucket_name # doc_version.bucket_name = bucket_name
#
try: # try:
stat = minio_client.client.stat_object( # stat = minio_client.client.stat_object(
bucket_name=bucket_name, # bucket_name=bucket_name,
object_name=object_name # object_name=object_name
) # )
doc_version.file_size = stat.size / 1048576 # doc_version.file_size = stat.size / 1048576
current_app.logger.info(f"Processed Upgrade for DocumentVersion {doc_version.id} for Tenant {tenant_id}") # current_app.logger.info(f"Processed Upgrade for DocumentVersion {doc_version.id} for Tenant {tenant_id}")
except S3Error as e: # except S3Error as e:
if e.code == "NoSuchKey": # if e.code == "NoSuchKey":
current_app.logger.warning( # current_app.logger.warning(
f"Object {doc_version.file_location} not found in bucket {doc_version.bucket_name}. Skipping.") # f"Object {doc_version.object_name} not found in bucket {doc_version.bucket_name}. Skipping.")
continue # Move to the next item # continue # Move to the next item
else: # else:
raise e # Handle other types of S3 errors # raise e # Handle other types of S3 errors
except Exception as e: # except Exception as e:
session.rollback() # session.rollback()
current_app.logger.error(f"Couldn't process upgrade for DocumentVersion {doc_version.id} for " # current_app.logger.error(f"Couldn't process upgrade for DocumentVersion {doc_version.id} for "
f"Tenant {tenant_id}. Error: {str(e)}") # f"Tenant {tenant_id}. Error: {str(e)}")
#
try: # try:
session.commit() # session.commit()
current_app.logger.info(f"Successfully updated file sizes for tenant schema {current_schema}") # current_app.logger.info(f"Successfully updated file sizes for tenant schema {current_schema}")
except Exception as e: # except Exception as e:
session.rollback() # session.rollback()
current_app.logger.error(f"Error committing changes for tenant schema {current_schema}: {str(e)}") # current_app.logger.error(f"Error committing changes for tenant schema {current_schema}: {str(e)}")
# ### commands auto generated by Alembic - Remove old fields ### # ### commands auto generated by Alembic - Remove old fields ###
# op.drop_column('document_version', 'file_location') # op.drop_column('document_version', 'file_location')

View File

@@ -0,0 +1,29 @@
"""Add tuning to Retriever
Revision ID: 331f8100eb87
Revises: 6c5ca750e60c
Create Date: 2024-10-31 07:17:22.579376
"""
from alembic import op
import sqlalchemy as sa
import pgvector
# revision identifiers, used by Alembic.
revision = '331f8100eb87'
down_revision = '6c5ca750e60c'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('retriever', sa.Column('tuning', sa.Boolean(), nullable=True))
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column('retriever', 'tuning')
# ### end Alembic commands ###

View File

@@ -0,0 +1,56 @@
"""Add Retriever Model
Revision ID: 3717364e6429
Revises: 7b7b566e667f
Create Date: 2024-10-21 14:22:30.258679
"""
from alembic import op
import sqlalchemy as sa
import pgvector
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = '3717364e6429'
down_revision = '7b7b566e667f'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('retriever',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('name', sa.String(length=50), nullable=False),
sa.Column('description', sa.Text(), nullable=True),
sa.Column('catalog_id', sa.Integer(), nullable=True),
sa.Column('type', sa.String(length=50), nullable=False),
sa.Column('user_metadata', postgresql.JSONB(astext_type=sa.Text()), nullable=True),
sa.Column('system_metadata', postgresql.JSONB(astext_type=sa.Text()), nullable=True),
sa.Column('configuration', postgresql.JSONB(astext_type=sa.Text()), nullable=True),
sa.Column('created_at', sa.DateTime(), server_default=sa.text('now()'), nullable=False),
sa.Column('created_by', sa.Integer(), nullable=True),
sa.Column('updated_at', sa.DateTime(), server_default=sa.text('now()'), nullable=False),
sa.Column('updated_by', sa.Integer(), nullable=True),
sa.ForeignKeyConstraint(['catalog_id'], ['catalog.id'], ),
sa.ForeignKeyConstraint(['created_by'], ['public.user.id'], ),
sa.ForeignKeyConstraint(['updated_by'], ['public.user.id'], ),
sa.PrimaryKeyConstraint('id')
)
op.drop_column('catalog', 'es_similarity_threshold')
op.drop_column('catalog', 'es_k')
op.drop_column('catalog', 'rag_tuning')
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('catalog', sa.Column('rag_tuning', sa.BOOLEAN(), autoincrement=False, nullable=True))
op.add_column('catalog', sa.Column('es_k', sa.INTEGER(), autoincrement=False, nullable=True))
op.add_column('catalog', sa.Column('es_similarity_threshold', sa.DOUBLE_PRECISION(precision=53), autoincrement=False, nullable=True))
op.drop_constraint(None, 'catalog', type_='foreignkey')
op.drop_constraint(None, 'catalog', type_='foreignkey')
op.create_foreign_key('catalog_updated_by_fkey', 'catalog', 'user', ['updated_by'], ['id'])
op.create_foreign_key('catalog_created_by_fkey', 'catalog', 'user', ['created_by'], ['id'])
op.drop_table('retriever')
# ### end Alembic commands ###

View File

@@ -0,0 +1,29 @@
"""Add catalog_properties to DocumentVersion
Revision ID: 6c5ca750e60c
Revises: b64d5cf32c7a
Create Date: 2024-10-29 08:33:54.663211
"""
from alembic import op
import sqlalchemy as sa
import pgvector
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = '6c5ca750e60c'
down_revision = 'b64d5cf32c7a'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('document_version', sa.Column('catalog_properties', postgresql.JSONB(astext_type=sa.Text()), nullable=True))
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column('document_version', 'catalog_properties')
# ### end Alembic commands ###

View File

@@ -0,0 +1,46 @@
"""Extensions for more Catalog Types in Catalog Model
Revision ID: 7b7b566e667f
Revises: 28984b05d396
Create Date: 2024-10-21 07:39:52.260054
"""
from alembic import op
import sqlalchemy as sa
import pgvector
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = '7b7b566e667f'
down_revision = '28984b05d396'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('catalog', sa.Column('parent_id', sa.Integer(), nullable=True))
op.add_column('catalog', sa.Column('type', sa.String(length=50), nullable=False, server_default='DEFAULT'))
# Update all existing rows to have the 'type' set to 'DEFAULT'
op.execute("UPDATE catalog SET type='DEFAULT' WHERE type IS NULL")
# Remove the server default (optional but recommended)
op.alter_column('catalog', 'type', server_default=None)
op.add_column('catalog', sa.Column('user_metadata', postgresql.JSONB(astext_type=sa.Text()), nullable=True))
op.add_column('catalog', sa.Column('system_metadata', postgresql.JSONB(astext_type=sa.Text()), nullable=True))
op.add_column('catalog', sa.Column('configuration', postgresql.JSONB(astext_type=sa.Text()), nullable=True))
op.create_foreign_key(None, 'catalog', 'catalog', ['parent_id'], ['id'])
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_constraint(None, 'catalog', type_='foreignkey')
op.drop_column('catalog', 'configuration')
op.drop_column('catalog', 'system_metadata')
op.drop_column('catalog', 'user_metadata')
op.drop_column('catalog', 'type')
op.drop_column('catalog', 'parent_id')
# ### end Alembic commands ###

View File

@@ -0,0 +1,33 @@
"""Removing Parent Catalog ID from Catalog ==> use tags in user_metadata instead
Revision ID: b64d5cf32c7a
Revises: 3717364e6429
Create Date: 2024-10-28 07:48:04.624298
"""
from alembic import op
import sqlalchemy as sa
import pgvector
# revision identifiers, used by Alembic.
revision = 'b64d5cf32c7a'
down_revision = '3717364e6429'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_constraint('catalog_parent_id_fkey', 'catalog', type_='foreignkey')
op.drop_column('catalog', 'parent_id')
op.drop_constraint('chat_session_user_id_fkey', 'chat_session', type_='foreignkey')
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('catalog', sa.Column('parent_id', sa.INTEGER(), autoincrement=False, nullable=True))
op.create_foreign_key('catalog_parent_id_fkey', 'catalog', 'catalog', ['parent_id'], ['id'])
# ### end Alembic commands ###

Some files were not shown because too many files have changed in this diff Show More