- Add Catalog Functionality
This commit is contained in:
20
CHANGELOG.md
20
CHANGELOG.md
@@ -25,6 +25,26 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||||||
### Security
|
### Security
|
||||||
- In case of vulnerabilities.
|
- In case of vulnerabilities.
|
||||||
|
|
||||||
|
## [1.0.12-alfa]
|
||||||
|
|
||||||
|
### Added
|
||||||
|
- Added Catalog functionality
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
- For changes in existing functionality.
|
||||||
|
|
||||||
|
### Deprecated
|
||||||
|
- For soon-to-be removed features.
|
||||||
|
|
||||||
|
### Removed
|
||||||
|
- For now removed features.
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- Set default language when registering Documents or URLs.
|
||||||
|
|
||||||
|
### Security
|
||||||
|
- In case of vulnerabilities.
|
||||||
|
|
||||||
## [1.0.11-alfa]
|
## [1.0.11-alfa]
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
|
|||||||
@@ -21,17 +21,17 @@ class Catalog(db.Model):
|
|||||||
min_chunk_size = db.Column(db.Integer, nullable=True, default=2000)
|
min_chunk_size = db.Column(db.Integer, nullable=True, default=2000)
|
||||||
max_chunk_size = db.Column(db.Integer, nullable=True, default=3000)
|
max_chunk_size = db.Column(db.Integer, nullable=True, default=3000)
|
||||||
|
|
||||||
# Embedding search variables
|
# Embedding search variables ==> move to specialist?
|
||||||
es_k = db.Column(db.Integer, nullable=True, default=5)
|
es_k = db.Column(db.Integer, nullable=True, default=8)
|
||||||
es_similarity_threshold = db.Column(db.Float, nullable=True, default=0.7)
|
es_similarity_threshold = db.Column(db.Float, nullable=True, default=0.4)
|
||||||
|
|
||||||
# Chat variables
|
# Chat variables ==> Move to Specialist?
|
||||||
chat_RAG_temperature = db.Column(db.Float, nullable=True, default=0.3)
|
chat_RAG_temperature = db.Column(db.Float, nullable=True, default=0.3)
|
||||||
chat_no_RAG_temperature = db.Column(db.Float, nullable=True, default=0.5)
|
chat_no_RAG_temperature = db.Column(db.Float, nullable=True, default=0.5)
|
||||||
|
|
||||||
# Tuning enablers
|
# Tuning enablers
|
||||||
embed_tuning = db.Column(db.Boolean, nullable=True, default=False)
|
embed_tuning = db.Column(db.Boolean, nullable=True, default=False)
|
||||||
rag_tuning = db.Column(db.Boolean, nullable=True, default=False)
|
rag_tuning = db.Column(db.Boolean, nullable=True, default=False) # Move to Specialist?
|
||||||
|
|
||||||
# Versioning Information
|
# Versioning Information
|
||||||
created_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now())
|
created_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now())
|
||||||
@@ -42,6 +42,7 @@ class Catalog(db.Model):
|
|||||||
|
|
||||||
class Document(db.Model):
|
class Document(db.Model):
|
||||||
id = db.Column(db.Integer, primary_key=True)
|
id = db.Column(db.Integer, primary_key=True)
|
||||||
|
# tenant_id = db.Column(db.Integer, db.ForeignKey(Tenant.id), nullable=False)
|
||||||
catalog_id = db.Column(db.Integer, db.ForeignKey(Catalog.id), nullable=True)
|
catalog_id = db.Column(db.Integer, db.ForeignKey(Catalog.id), nullable=True)
|
||||||
name = db.Column(db.String(100), nullable=False)
|
name = db.Column(db.String(100), nullable=False)
|
||||||
valid_from = db.Column(db.DateTime, nullable=True)
|
valid_from = db.Column(db.DateTime, nullable=True)
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ def create_document_stack(api_input, file, filename, extension, tenant_id):
|
|||||||
db.session.add(new_doc)
|
db.session.add(new_doc)
|
||||||
|
|
||||||
# Create the DocumentVersion
|
# Create the DocumentVersion
|
||||||
new_doc_vers = create_version_for_document(new_doc,
|
new_doc_vers = create_version_for_document(new_doc, tenant_id,
|
||||||
api_input.get('url', ''),
|
api_input.get('url', ''),
|
||||||
api_input.get('language', 'en'),
|
api_input.get('language', 'en'),
|
||||||
api_input.get('user_context', ''),
|
api_input.get('user_context', ''),
|
||||||
@@ -63,7 +63,7 @@ def create_document(form, filename, catalog_id):
|
|||||||
return new_doc
|
return new_doc
|
||||||
|
|
||||||
|
|
||||||
def create_version_for_document(document, url, language, user_context, user_metadata):
|
def create_version_for_document(document, tenant_id, url, language, user_context, user_metadata):
|
||||||
new_doc_vers = DocumentVersion()
|
new_doc_vers = DocumentVersion()
|
||||||
if url != '':
|
if url != '':
|
||||||
new_doc_vers.url = url
|
new_doc_vers.url = url
|
||||||
@@ -83,7 +83,7 @@ def create_version_for_document(document, url, language, user_context, user_meta
|
|||||||
|
|
||||||
set_logging_information(new_doc_vers, dt.now(tz.utc))
|
set_logging_information(new_doc_vers, dt.now(tz.utc))
|
||||||
|
|
||||||
mark_tenant_storage_dirty(document.tenant_id)
|
mark_tenant_storage_dirty(tenant_id)
|
||||||
|
|
||||||
return new_doc_vers
|
return new_doc_vers
|
||||||
|
|
||||||
@@ -287,7 +287,7 @@ def edit_document_version(version_id, user_context):
|
|||||||
return None, str(e)
|
return None, str(e)
|
||||||
|
|
||||||
|
|
||||||
def refresh_document_with_info(doc_id, api_input):
|
def refresh_document_with_info(doc_id, tenant_id, api_input):
|
||||||
doc = Document.query.get_or_404(doc_id)
|
doc = Document.query.get_or_404(doc_id)
|
||||||
old_doc_vers = DocumentVersion.query.filter_by(doc_id=doc_id).order_by(desc(DocumentVersion.id)).first()
|
old_doc_vers = DocumentVersion.query.filter_by(doc_id=doc_id).order_by(desc(DocumentVersion.id)).first()
|
||||||
|
|
||||||
@@ -295,11 +295,11 @@ def refresh_document_with_info(doc_id, api_input):
|
|||||||
return None, "This document has no URL. Only documents with a URL can be refreshed."
|
return None, "This document has no URL. Only documents with a URL can be refreshed."
|
||||||
|
|
||||||
new_doc_vers = create_version_for_document(
|
new_doc_vers = create_version_for_document(
|
||||||
doc,
|
doc, tenant_id,
|
||||||
old_doc_vers.url,
|
old_doc_vers.url,
|
||||||
api_input.get('language', old_doc_vers.language),
|
api_input.get('language', old_doc_vers.language),
|
||||||
api_input.get('user_context', old_doc_vers.user_context),
|
api_input.get('user_context', old_doc_vers.user_context),
|
||||||
api_input.get('user_metadata', old_doc_vers.user_metadata)
|
api_input.get('user_metadata', old_doc_vers.user_metadata),
|
||||||
)
|
)
|
||||||
|
|
||||||
set_logging_information(new_doc_vers, dt.now(tz.utc))
|
set_logging_information(new_doc_vers, dt.now(tz.utc))
|
||||||
@@ -329,7 +329,7 @@ def refresh_document_with_info(doc_id, api_input):
|
|||||||
|
|
||||||
|
|
||||||
# Update the existing refresh_document function to use the new refresh_document_with_info
|
# Update the existing refresh_document function to use the new refresh_document_with_info
|
||||||
def refresh_document(doc_id):
|
def refresh_document(doc_id, tenant_id):
|
||||||
current_app.logger.info(f'Refreshing document {doc_id}')
|
current_app.logger.info(f'Refreshing document {doc_id}')
|
||||||
doc = Document.query.get_or_404(doc_id)
|
doc = Document.query.get_or_404(doc_id)
|
||||||
old_doc_vers = DocumentVersion.query.filter_by(doc_id=doc_id).order_by(desc(DocumentVersion.id)).first()
|
old_doc_vers = DocumentVersion.query.filter_by(doc_id=doc_id).order_by(desc(DocumentVersion.id)).first()
|
||||||
@@ -340,7 +340,7 @@ def refresh_document(doc_id):
|
|||||||
'user_metadata': old_doc_vers.user_metadata
|
'user_metadata': old_doc_vers.user_metadata
|
||||||
}
|
}
|
||||||
|
|
||||||
return refresh_document_with_info(doc_id, api_input)
|
return refresh_document_with_info(doc_id, tenant_id, api_input)
|
||||||
|
|
||||||
|
|
||||||
# Function triggered when a document_version is created or updated
|
# Function triggered when a document_version is created or updated
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ from portkey_ai.langchain.portkey_langchain_callback_handler import LangchainCal
|
|||||||
from common.langchain.llm_metrics_handler import LLMMetricsHandler
|
from common.langchain.llm_metrics_handler import LLMMetricsHandler
|
||||||
from common.langchain.tracked_openai_embeddings import TrackedOpenAIEmbeddings
|
from common.langchain.tracked_openai_embeddings import TrackedOpenAIEmbeddings
|
||||||
from common.langchain.tracked_transcribe import tracked_transcribe
|
from common.langchain.tracked_transcribe import tracked_transcribe
|
||||||
from common.models.document import EmbeddingSmallOpenAI, EmbeddingLargeOpenAI
|
from common.models.document import EmbeddingSmallOpenAI, EmbeddingLargeOpenAI, Catalog
|
||||||
from common.models.user import Tenant
|
from common.models.user import Tenant
|
||||||
from config.model_config import MODEL_CONFIG
|
from config.model_config import MODEL_CONFIG
|
||||||
from common.utils.business_event_context import current_event
|
from common.utils.business_event_context import current_event
|
||||||
@@ -42,8 +42,9 @@ def set_language_prompt_template(cls, language_prompt):
|
|||||||
|
|
||||||
|
|
||||||
class ModelVariables(MutableMapping):
|
class ModelVariables(MutableMapping):
|
||||||
def __init__(self, tenant: Tenant):
|
def __init__(self, tenant: Tenant, catalog_id=None):
|
||||||
self.tenant = tenant
|
self.tenant = tenant
|
||||||
|
self.catalog_id = catalog_id
|
||||||
self._variables = self._initialize_variables()
|
self._variables = self._initialize_variables()
|
||||||
self._embedding_model = None
|
self._embedding_model = None
|
||||||
self._llm = None
|
self._llm = None
|
||||||
@@ -57,25 +58,32 @@ class ModelVariables(MutableMapping):
|
|||||||
def _initialize_variables(self):
|
def _initialize_variables(self):
|
||||||
variables = {}
|
variables = {}
|
||||||
|
|
||||||
# We initialize the variables that are available knowing the tenant. For the other, we will apply 'lazy loading'
|
# Get the Catalog if catalog_id is passed
|
||||||
variables['k'] = self.tenant.es_k or 5
|
if self.catalog_id:
|
||||||
variables['similarity_threshold'] = self.tenant.es_similarity_threshold or 0.7
|
catalog = Catalog.query.get_or_404(self.catalog_id)
|
||||||
variables['RAG_temperature'] = self.tenant.chat_RAG_temperature or 0.3
|
|
||||||
variables['no_RAG_temperature'] = self.tenant.chat_no_RAG_temperature or 0.5
|
# We initialize the variables that are available knowing the tenant.
|
||||||
variables['embed_tuning'] = self.tenant.embed_tuning or False
|
variables['embed_tuning'] = catalog.embed_tuning or False
|
||||||
variables['rag_tuning'] = self.tenant.rag_tuning or False
|
|
||||||
variables['rag_context'] = self.tenant.rag_context or " "
|
|
||||||
|
|
||||||
# Set HTML Chunking Variables
|
# Set HTML Chunking Variables
|
||||||
variables['html_tags'] = self.tenant.html_tags
|
variables['html_tags'] = catalog.html_tags
|
||||||
variables['html_end_tags'] = self.tenant.html_end_tags
|
variables['html_end_tags'] = catalog.html_end_tags
|
||||||
variables['html_included_elements'] = self.tenant.html_included_elements
|
variables['html_included_elements'] = catalog.html_included_elements
|
||||||
variables['html_excluded_elements'] = self.tenant.html_excluded_elements
|
variables['html_excluded_elements'] = catalog.html_excluded_elements
|
||||||
variables['html_excluded_classes'] = self.tenant.html_excluded_classes
|
variables['html_excluded_classes'] = catalog.html_excluded_classes
|
||||||
|
|
||||||
# Set Chunk Size variables
|
# Set Chunk Size variables
|
||||||
variables['min_chunk_size'] = self.tenant.min_chunk_size
|
variables['min_chunk_size'] = catalog.min_chunk_size
|
||||||
variables['max_chunk_size'] = self.tenant.max_chunk_size
|
variables['max_chunk_size'] = catalog.max_chunk_size
|
||||||
|
|
||||||
|
# Set the RAG Context (will have to change once specialists are defined
|
||||||
|
variables['rag_context'] = self.tenant.rag_context or " "
|
||||||
|
# Temporary setting until we have Specialists
|
||||||
|
variables['rag_tuning'] = False
|
||||||
|
variables['RAG_temperature'] = 0.3
|
||||||
|
variables['no_RAG_temperature'] = 0.5
|
||||||
|
variables['k'] = 8
|
||||||
|
variables['similarity_threshold'] = 0.4
|
||||||
|
|
||||||
# Set model providers
|
# Set model providers
|
||||||
variables['embedding_provider'], variables['embedding_model'] = self.tenant.embedding_model.rsplit('.', 1)
|
variables['embedding_provider'], variables['embedding_model'] = self.tenant.embedding_model.rsplit('.', 1)
|
||||||
@@ -195,7 +203,12 @@ class ModelVariables(MutableMapping):
|
|||||||
return self.transcription_client
|
return self.transcription_client
|
||||||
elif key in self._variables.get('prompt_templates', []):
|
elif key in self._variables.get('prompt_templates', []):
|
||||||
return self.get_prompt_template(key)
|
return self.get_prompt_template(key)
|
||||||
return self._variables.get(key)
|
else:
|
||||||
|
value = self._variables.get(key)
|
||||||
|
if value is not None:
|
||||||
|
return value
|
||||||
|
else:
|
||||||
|
raise KeyError(f'Variable {key} does not exist in ModelVariables')
|
||||||
|
|
||||||
def __setitem__(self, key: str, value: Any) -> None:
|
def __setitem__(self, key: str, value: Any) -> None:
|
||||||
self._variables[key] = value
|
self._variables[key] = value
|
||||||
@@ -225,8 +238,8 @@ class ModelVariables(MutableMapping):
|
|||||||
return self._variables.values()
|
return self._variables.values()
|
||||||
|
|
||||||
|
|
||||||
def select_model_variables(tenant):
|
def select_model_variables(tenant, catalog_id=None):
|
||||||
model_variables = ModelVariables(tenant=tenant)
|
model_variables = ModelVariables(tenant=tenant, catalog_id=catalog_id)
|
||||||
return model_variables
|
return model_variables
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -217,7 +217,8 @@ class DocumentResource(Resource):
|
|||||||
@document_ns.response(200, 'Document refreshed successfully')
|
@document_ns.response(200, 'Document refreshed successfully')
|
||||||
def post(self, document_id):
|
def post(self, document_id):
|
||||||
"""Refresh a document"""
|
"""Refresh a document"""
|
||||||
new_version, result = refresh_document(document_id)
|
tenant_id = get_jwt_identity()
|
||||||
|
new_version, result = refresh_document(document_id, tenant_id)
|
||||||
if new_version:
|
if new_version:
|
||||||
return {'message': f'Document refreshed. New version: {new_version.id}. Task ID: {result}'}, 200
|
return {'message': f'Document refreshed. New version: {new_version.id}. Task ID: {result}'}, 200
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -13,6 +13,7 @@
|
|||||||
{{ render_selectable_table(headers=["Catalog ID", "Name"], rows=rows, selectable=True, id="catalogsTable") }}
|
{{ render_selectable_table(headers=["Catalog ID", "Name"], rows=rows, selectable=True, id="catalogsTable") }}
|
||||||
<div class="form-group mt-3">
|
<div class="form-group mt-3">
|
||||||
<button type="submit" name="action" value="set_session_catalog" class="btn btn-primary">Set Session Catalog</button>
|
<button type="submit" name="action" value="set_session_catalog" class="btn btn-primary">Set Session Catalog</button>
|
||||||
|
<button type="submit" name="action" value="edit_catalog" class="btn btn-primary">Edit Catalog</button>
|
||||||
</div>
|
</div>
|
||||||
</form>
|
</form>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -81,6 +81,8 @@
|
|||||||
{% endif %}
|
{% endif %}
|
||||||
{% if current_user.is_authenticated %}
|
{% if current_user.is_authenticated %}
|
||||||
{{ dropdown('Document Mgmt', 'note_stack', [
|
{{ dropdown('Document Mgmt', 'note_stack', [
|
||||||
|
{'name': 'Add Catalog', 'url': '/document/catalog', 'roles': ['Super User', 'Tenant Admin']},
|
||||||
|
{'name': 'All Catalogs', 'url': '/document/catalogs', 'roles': ['Super User', 'Tenant Admin']},
|
||||||
{'name': 'Add Document', 'url': '/document/add_document', 'roles': ['Super User', 'Tenant Admin']},
|
{'name': 'Add Document', 'url': '/document/add_document', 'roles': ['Super User', 'Tenant Admin']},
|
||||||
{'name': 'Add URL', 'url': '/document/add_url', 'roles': ['Super User', 'Tenant Admin']},
|
{'name': 'Add URL', 'url': '/document/add_url', 'roles': ['Super User', 'Tenant Admin']},
|
||||||
{'name': 'Add a list of URLs', 'url': '/document/add_urls', 'roles': ['Super User', 'Tenant Admin']},
|
{'name': 'Add a list of URLs', 'url': '/document/add_urls', 'roles': ['Super User', 'Tenant Admin']},
|
||||||
@@ -114,6 +116,17 @@
|
|||||||
{% endif %}
|
{% endif %}
|
||||||
</ul>
|
</ul>
|
||||||
{% if current_user.is_authenticated %}
|
{% if current_user.is_authenticated %}
|
||||||
|
<ul class="navbar-nav d-lg-block d-none">
|
||||||
|
<li class="nav-item">
|
||||||
|
<a href="/document/catalogs" class="btn btn-sm bg-gradient-primary mb-0 me-2">
|
||||||
|
{% if 'catalog_name' in session %}
|
||||||
|
CATALOG: {{ session['catalog_name'] }}
|
||||||
|
{% else %}
|
||||||
|
CHOOSE CATALOG
|
||||||
|
{% endif %}
|
||||||
|
</a>
|
||||||
|
</li>
|
||||||
|
</ul>
|
||||||
<ul class="navbar-nav d-lg-block d-none">
|
<ul class="navbar-nav d-lg-block d-none">
|
||||||
<li class="nav-item">
|
<li class="nav-item">
|
||||||
<a href="/session_defaults" class="btn btn-sm bg-gradient-primary mb-0">
|
<a href="/session_defaults" class="btn btn-sm bg-gradient-primary mb-0">
|
||||||
|
|||||||
@@ -10,7 +10,7 @@
|
|||||||
<form method="post">
|
<form method="post">
|
||||||
{{ form.hidden_tag() }}
|
{{ form.hidden_tag() }}
|
||||||
<!-- Main Tenant Information -->
|
<!-- Main Tenant Information -->
|
||||||
{% set main_fields = ['name', 'website', 'default_language', 'allowed_languages', 'rag_context', 'type'] %}
|
{% set main_fields = ['name', 'website', 'default_language', 'allowed_languages', 'timezone','rag_context', 'type'] %}
|
||||||
{% for field in form %}
|
{% for field in form %}
|
||||||
{{ render_included_field(field, disabled_fields=[], include_fields=main_fields) }}
|
{{ render_included_field(field, disabled_fields=[], include_fields=main_fields) }}
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
@@ -30,21 +30,6 @@
|
|||||||
License Information
|
License Information
|
||||||
</a>
|
</a>
|
||||||
</li>
|
</li>
|
||||||
<li class="nav-item">
|
|
||||||
<a class="nav-link mb-0 px-0 py-1" data-toggle="tab" href="#chunking-tab" role="tab" aria-controls="chunking" aria-selected="false">
|
|
||||||
Chunking
|
|
||||||
</a>
|
|
||||||
</li>
|
|
||||||
<li class="nav-item">
|
|
||||||
<a class="nav-link mb-0 px-0 py-1" data-toggle="tab" href="#embedding-search-tab" role="tab" aria-controls="html-chunking" aria-selected="false">
|
|
||||||
Embedding Search
|
|
||||||
</a>
|
|
||||||
</li>
|
|
||||||
<li class="nav-item">
|
|
||||||
<a class="nav-link mb-0 px-0 py-1" data-toggle="tab" href="#tuning-tab" role="tab" aria-controls="html-chunking" aria-selected="false">
|
|
||||||
Tuning
|
|
||||||
</a>
|
|
||||||
</li>
|
|
||||||
</ul>
|
</ul>
|
||||||
</div>
|
</div>
|
||||||
<div class="tab-content tab-space">
|
<div class="tab-content tab-space">
|
||||||
@@ -78,27 +63,6 @@
|
|||||||
<p id="copy-message" style="display:none;color:green;">API key copied to clipboard</p>
|
<p id="copy-message" style="display:none;color:green;">API key copied to clipboard</p>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<!-- Chunking Settings Tab -->
|
|
||||||
<div class="tab-pane fade" id="chunking-tab" role="tabpanel">
|
|
||||||
{% set html_fields = ['html_tags', 'html_end_tags', 'html_included_elements', 'html_excluded_elements', 'html_excluded_classes', 'min_chunk_size', 'max_chunk_size'] %}
|
|
||||||
{% for field in form %}
|
|
||||||
{{ render_included_field(field, disabled_fields=[], include_fields=html_fields) }}
|
|
||||||
{% endfor %}
|
|
||||||
</div>
|
|
||||||
<!-- Embedding Search Settings Tab -->
|
|
||||||
<div class="tab-pane fade" id="embedding-search-tab" role="tabpanel">
|
|
||||||
{% set es_fields = ['es_k', 'es_similarity_threshold', ] %}
|
|
||||||
{% for field in form %}
|
|
||||||
{{ render_included_field(field, disabled_fields=[], include_fields=es_fields) }}
|
|
||||||
{% endfor %}
|
|
||||||
</div>
|
|
||||||
<!-- Tuning Settings Tab -->
|
|
||||||
<div class="tab-pane fade" id="tuning-tab" role="tabpanel">
|
|
||||||
{% set tuning_fields = ['embed_tuning', 'rag_tuning', ] %}
|
|
||||||
{% for field in form %}
|
|
||||||
{{ render_included_field(field, disabled_fields=[], include_fields=tuning_fields) }}
|
|
||||||
{% endfor %}
|
|
||||||
</div>
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -63,6 +63,16 @@ def catalog():
|
|||||||
tenant_id = session.get('tenant').get('id')
|
tenant_id = session.get('tenant').get('id')
|
||||||
new_catalog = Catalog()
|
new_catalog = Catalog()
|
||||||
form.populate_obj(new_catalog)
|
form.populate_obj(new_catalog)
|
||||||
|
# Handle Embedding Variables
|
||||||
|
new_catalog.html_tags = [tag.strip() for tag in form.html_tags.data.split(',')] if form.html_tags.data else []
|
||||||
|
new_catalog.html_end_tags = [tag.strip() for tag in form.html_end_tags.data.split(',')] \
|
||||||
|
if form.html_end_tags.data else []
|
||||||
|
new_catalog.html_included_elements = [tag.strip() for tag in form.html_included_elements.data.split(',')] \
|
||||||
|
if form.html_included_elements.data else []
|
||||||
|
new_catalog.html_excluded_elements = [tag.strip() for tag in form.html_excluded_elements.data.split(',')] \
|
||||||
|
if form.html_excluded_elements.data else []
|
||||||
|
new_catalog.html_excluded_classes = [cls.strip() for cls in form.html_excluded_classes.data.split(',')] \
|
||||||
|
if form.html_excluded_classes.data else []
|
||||||
set_logging_information(new_catalog, dt.now(tz.utc))
|
set_logging_information(new_catalog, dt.now(tz.utc))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -75,13 +85,11 @@ def catalog():
|
|||||||
flash(f'Failed to add catalog. Error: {e}', 'danger')
|
flash(f'Failed to add catalog. Error: {e}', 'danger')
|
||||||
current_app.logger.error(f'Failed to add catalog {new_catalog.name}'
|
current_app.logger.error(f'Failed to add catalog {new_catalog.name}'
|
||||||
f'for tenant {tenant_id}. Error: {str(e)}')
|
f'for tenant {tenant_id}. Error: {str(e)}')
|
||||||
else:
|
|
||||||
flash('Please fill in all required fields.', 'information')
|
|
||||||
|
|
||||||
return render_template('document/catalog.html')
|
return render_template('document/catalog.html', form=form)
|
||||||
|
|
||||||
|
|
||||||
@document_bp.route('/catalogs', methods=['POST'])
|
@document_bp.route('/catalogs', methods=['GET', 'POST'])
|
||||||
@roles_accepted('Super User', 'Tenant Admin')
|
@roles_accepted('Super User', 'Tenant Admin')
|
||||||
def catalogs():
|
def catalogs():
|
||||||
page = request.args.get('page', 1, type=int)
|
page = request.args.get('page', 1, type=int)
|
||||||
@@ -108,8 +116,10 @@ def handle_catalog_selection():
|
|||||||
catalog = Catalog.query.get_or_404(catalog_id)
|
catalog = Catalog.query.get_or_404(catalog_id)
|
||||||
|
|
||||||
if action == 'set_session_catalog':
|
if action == 'set_session_catalog':
|
||||||
|
current_app.logger.info(f'Setting session catalog to {catalog.name}')
|
||||||
session['catalog_id'] = catalog_id
|
session['catalog_id'] = catalog_id
|
||||||
session['catalog_name'] = catalog.name
|
session['catalog_name'] = catalog.name
|
||||||
|
current_app.logger.info(f'Finished setting session catalog to {catalog.name}')
|
||||||
elif action == 'edit_catalog':
|
elif action == 'edit_catalog':
|
||||||
return redirect(prefixed_url_for('document_bp.edit_catalog', catalog_id=catalog_id))
|
return redirect(prefixed_url_for('document_bp.edit_catalog', catalog_id=catalog_id))
|
||||||
|
|
||||||
@@ -125,6 +135,16 @@ def edit_catalog(catalog_id):
|
|||||||
|
|
||||||
if request.method == 'POST' and form.validate_on_submit():
|
if request.method == 'POST' and form.validate_on_submit():
|
||||||
form.populate_obj(catalog)
|
form.populate_obj(catalog)
|
||||||
|
# Handle Embedding Variables
|
||||||
|
catalog.html_tags = [tag.strip() for tag in form.html_tags.data.split(',')] if form.html_tags.data else []
|
||||||
|
catalog.html_end_tags = [tag.strip() for tag in form.html_end_tags.data.split(',')] \
|
||||||
|
if form.html_end_tags.data else []
|
||||||
|
catalog.html_included_elements = [tag.strip() for tag in form.html_included_elements.data.split(',')] \
|
||||||
|
if form.html_included_elements.data else []
|
||||||
|
catalog.html_excluded_elements = [tag.strip() for tag in form.html_excluded_elements.data.split(',')] \
|
||||||
|
if form.html_excluded_elements.data else []
|
||||||
|
catalog.html_excluded_classes = [cls.strip() for cls in form.html_excluded_classes.data.split(',')] \
|
||||||
|
if form.html_excluded_classes.data else []
|
||||||
update_logging_information(catalog, dt.now(tz.utc))
|
update_logging_information(catalog, dt.now(tz.utc))
|
||||||
try:
|
try:
|
||||||
db.session.add(catalog)
|
db.session.add(catalog)
|
||||||
|
|||||||
@@ -266,10 +266,16 @@ def handle_tenant_selection():
|
|||||||
tenant_identification = request.form['selected_row']
|
tenant_identification = request.form['selected_row']
|
||||||
tenant_id = ast.literal_eval(tenant_identification).get('value')
|
tenant_id = ast.literal_eval(tenant_identification).get('value')
|
||||||
the_tenant = Tenant.query.get(tenant_id)
|
the_tenant = Tenant.query.get(tenant_id)
|
||||||
|
|
||||||
|
# set tenant information in the session
|
||||||
session['tenant'] = the_tenant.to_dict()
|
session['tenant'] = the_tenant.to_dict()
|
||||||
session['default_language'] = the_tenant.default_language
|
session['default_language'] = the_tenant.default_language
|
||||||
session['embedding_model'] = the_tenant.embedding_model
|
session['embedding_model'] = the_tenant.embedding_model
|
||||||
session['llm_model'] = the_tenant.llm_model
|
session['llm_model'] = the_tenant.llm_model
|
||||||
|
# remove catalog-related items from the session
|
||||||
|
session.pop('catalog_id', None)
|
||||||
|
session.pop('catalog_name', None)
|
||||||
|
|
||||||
action = request.form['action']
|
action = request.form['action']
|
||||||
|
|
||||||
match action:
|
match action:
|
||||||
|
|||||||
@@ -36,7 +36,7 @@ def ping():
|
|||||||
|
|
||||||
def detail_question(question, language, model_variables, session_id):
|
def detail_question(question, language, model_variables, session_id):
|
||||||
current_app.logger.debug(f'Detail question: {question}')
|
current_app.logger.debug(f'Detail question: {question}')
|
||||||
current_app.logger.debug(f'model_varialbes: {model_variables}')
|
current_app.logger.debug(f'model_variables: {model_variables}')
|
||||||
current_app.logger.debug(f'session_id: {session_id}')
|
current_app.logger.debug(f'session_id: {session_id}')
|
||||||
retriever = EveAIHistoryRetriever(model_variables=model_variables, session_id=session_id)
|
retriever = EveAIHistoryRetriever(model_variables=model_variables, session_id=session_id)
|
||||||
llm = model_variables['llm']
|
llm = model_variables['llm']
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ from langchain_core.runnables import RunnablePassthrough
|
|||||||
from sqlalchemy.exc import SQLAlchemyError
|
from sqlalchemy.exc import SQLAlchemyError
|
||||||
|
|
||||||
from common.extensions import db, minio_client
|
from common.extensions import db, minio_client
|
||||||
from common.models.document import DocumentVersion, Embedding
|
from common.models.document import DocumentVersion, Embedding, Document
|
||||||
from common.models.user import Tenant
|
from common.models.user import Tenant
|
||||||
from common.utils.celery_utils import current_celery
|
from common.utils.celery_utils import current_celery
|
||||||
from common.utils.database import Database
|
from common.utils.database import Database
|
||||||
@@ -50,8 +50,12 @@ def create_embeddings(tenant_id, document_version_id):
|
|||||||
if document_version is None:
|
if document_version is None:
|
||||||
raise Exception(f'Document version {document_version_id} not found')
|
raise Exception(f'Document version {document_version_id} not found')
|
||||||
|
|
||||||
|
# Retrieve the Catalog ID
|
||||||
|
doc = Document.query.get_or_404(document_version.doc_id)
|
||||||
|
catalog_id = doc.catalog_id
|
||||||
|
|
||||||
# Select variables to work with depending on tenant and model
|
# Select variables to work with depending on tenant and model
|
||||||
model_variables = select_model_variables(tenant)
|
model_variables = select_model_variables(tenant, catalog_id=catalog_id)
|
||||||
current_app.logger.debug(f'Model variables: {model_variables}')
|
current_app.logger.debug(f'Model variables: {model_variables}')
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
import inspect
|
import inspect
|
||||||
import logging
|
import logging
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
from logging.config import fileConfig
|
from logging.config import fileConfig
|
||||||
|
|
||||||
from flask import current_app
|
from flask import current_app
|
||||||
@@ -19,8 +21,26 @@ config = context.config
|
|||||||
|
|
||||||
# Interpret the config file for Python logging.
|
# Interpret the config file for Python logging.
|
||||||
# This line sets up loggers basically.
|
# This line sets up loggers basically.
|
||||||
fileConfig(config.config_file_name)
|
# fileConfig(config.config_file_name)
|
||||||
logger = logging.getLogger('alembic.env')
|
# logger = logging.getLogger('alembic.env')
|
||||||
|
|
||||||
|
logging.basicConfig(
|
||||||
|
stream=sys.stdout,
|
||||||
|
level=logging.INFO,
|
||||||
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger()
|
||||||
|
|
||||||
|
# Reset handlers to avoid issues with Alembic overriding them
|
||||||
|
for handler in logger.handlers:
|
||||||
|
logger.removeHandler(handler)
|
||||||
|
|
||||||
|
# Add a stream handler to output logs to the console
|
||||||
|
console_handler = logging.StreamHandler(sys.stdout)
|
||||||
|
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
|
||||||
|
logger.addHandler(console_handler)
|
||||||
|
logger.setLevel(logging.INFO)
|
||||||
|
|
||||||
|
|
||||||
def get_engine():
|
def get_engine():
|
||||||
@@ -125,6 +145,7 @@ def run_migrations_online():
|
|||||||
tenants = get_tenant_ids()
|
tenants = get_tenant_ids()
|
||||||
for tenant in tenants:
|
for tenant in tenants:
|
||||||
try:
|
try:
|
||||||
|
os.environ['TENANT_ID'] = str(tenant)
|
||||||
logger.info(f"Migrating tenant: {tenant}")
|
logger.info(f"Migrating tenant: {tenant}")
|
||||||
# set search path on the connection, which ensures that
|
# set search path on the connection, which ensures that
|
||||||
# PostgreSQL will emit all CREATE / ALTER / DROP statements
|
# PostgreSQL will emit all CREATE / ALTER / DROP statements
|
||||||
|
|||||||
@@ -0,0 +1,56 @@
|
|||||||
|
"""Adding Catalogs to Document Domain
|
||||||
|
|
||||||
|
Revision ID: 0f5932ff3051
|
||||||
|
Revises: 5a75fb6da7b8
|
||||||
|
Create Date: 2024-10-14 15:20:45.058329
|
||||||
|
|
||||||
|
"""
|
||||||
|
from alembic import op
|
||||||
|
import sqlalchemy as sa
|
||||||
|
import pgvector
|
||||||
|
from sqlalchemy.dialects import postgresql
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision = '0f5932ff3051'
|
||||||
|
down_revision = '5a75fb6da7b8'
|
||||||
|
branch_labels = None
|
||||||
|
depends_on = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade():
|
||||||
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
|
op.create_table('catalog',
|
||||||
|
sa.Column('id', sa.Integer(), nullable=False),
|
||||||
|
sa.Column('name', sa.String(length=50), nullable=False),
|
||||||
|
sa.Column('description', sa.Text(), nullable=True),
|
||||||
|
sa.Column('html_tags', postgresql.ARRAY(sa.String(length=10)), nullable=True),
|
||||||
|
sa.Column('html_end_tags', postgresql.ARRAY(sa.String(length=10)), nullable=True),
|
||||||
|
sa.Column('html_included_elements', postgresql.ARRAY(sa.String(length=50)), nullable=True),
|
||||||
|
sa.Column('html_excluded_elements', postgresql.ARRAY(sa.String(length=50)), nullable=True),
|
||||||
|
sa.Column('html_excluded_classes', postgresql.ARRAY(sa.String(length=200)), nullable=True),
|
||||||
|
sa.Column('min_chunk_size', sa.Integer(), nullable=True),
|
||||||
|
sa.Column('max_chunk_size', sa.Integer(), nullable=True),
|
||||||
|
sa.Column('es_k', sa.Integer(), nullable=True),
|
||||||
|
sa.Column('es_similarity_threshold', sa.Float(), nullable=True),
|
||||||
|
sa.Column('chat_RAG_temperature', sa.Float(), nullable=True),
|
||||||
|
sa.Column('chat_no_RAG_temperature', sa.Float(), nullable=True),
|
||||||
|
sa.Column('embed_tuning', sa.Boolean(), nullable=True),
|
||||||
|
sa.Column('rag_tuning', sa.Boolean(), nullable=True),
|
||||||
|
sa.Column('created_at', sa.DateTime(), server_default=sa.text('now()'), nullable=False),
|
||||||
|
sa.Column('created_by', sa.Integer(), nullable=True),
|
||||||
|
sa.Column('updated_at', sa.DateTime(), server_default=sa.text('now()'), nullable=False),
|
||||||
|
sa.Column('updated_by', sa.Integer(), nullable=True),
|
||||||
|
sa.ForeignKeyConstraint(['created_by'], ['public.user.id'], ),
|
||||||
|
sa.ForeignKeyConstraint(['updated_by'], ['public.user.id'], ),
|
||||||
|
sa.PrimaryKeyConstraint('id')
|
||||||
|
)
|
||||||
|
op.add_column('document', sa.Column('catalog_id', sa.Integer(), nullable=True))
|
||||||
|
op.create_foreign_key(None, 'document', 'catalog', ['catalog_id'], ['id'])
|
||||||
|
# ### end Alembic commands ###
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade():
|
||||||
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
|
op.drop_column('document', 'catalog_id')
|
||||||
|
op.drop_table('catalog')
|
||||||
|
# ### end Alembic commands ###
|
||||||
@@ -0,0 +1,99 @@
|
|||||||
|
"""Upgrading Existing documents to default catalog
|
||||||
|
|
||||||
|
Revision ID: 28984b05d396
|
||||||
|
Revises: 0f5932ff3051
|
||||||
|
Create Date: 2024-10-15 09:02:23.355660
|
||||||
|
|
||||||
|
"""
|
||||||
|
from alembic import op
|
||||||
|
import sqlalchemy as sa
|
||||||
|
from sqlalchemy.orm import Session
|
||||||
|
import pgvector
|
||||||
|
from common.models.document import Catalog, Document
|
||||||
|
from common.models.user import Tenant
|
||||||
|
from flask import current_app
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
|
||||||
|
# Set up logging
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Create a console handler
|
||||||
|
console_handler = logging.StreamHandler()
|
||||||
|
console_handler.setLevel(logging.INFO)
|
||||||
|
|
||||||
|
# Create a logging format
|
||||||
|
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||||
|
console_handler.setFormatter(formatter)
|
||||||
|
|
||||||
|
# Add the handler to the logger
|
||||||
|
logger.addHandler(console_handler)
|
||||||
|
|
||||||
|
logger.info("Starting revision upgrade 28984b05d396")
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision = '28984b05d396'
|
||||||
|
down_revision = '0f5932ff3051'
|
||||||
|
branch_labels = None
|
||||||
|
depends_on = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade():
|
||||||
|
logger.info("Starting migration: Creating default catalog and updating documents.")
|
||||||
|
|
||||||
|
bind = op.get_bind()
|
||||||
|
session = Session(bind=bind)
|
||||||
|
|
||||||
|
try:
|
||||||
|
tenant_id = int(os.getenv('TENANT_ID'))
|
||||||
|
logger.info(f"In migration: tenant_id = {tenant_id}")
|
||||||
|
|
||||||
|
# Step 1: Create a new Default Catalog
|
||||||
|
logger.info("Creating default catalog")
|
||||||
|
default_catalog = Catalog(
|
||||||
|
name='Default Catalog',
|
||||||
|
description=None,
|
||||||
|
)
|
||||||
|
tenant = Tenant.query.get_or_404(tenant_id)
|
||||||
|
default_catalog.html_tags = tenant.html_tags
|
||||||
|
default_catalog.html_end_tags = tenant.html_end_tags
|
||||||
|
default_catalog.html_included_elements = tenant.html_included_elements
|
||||||
|
default_catalog.html_excluded_elements = tenant.html_excluded_elements
|
||||||
|
default_catalog.html_excluded_classes = tenant.html_excluded_classes
|
||||||
|
default_catalog.min_chunk_size = tenant.min_chunk_size
|
||||||
|
default_catalog.max_chunk_size = tenant.max_chunk_size
|
||||||
|
default_catalog.es_k = tenant.es_k
|
||||||
|
default_catalog.es_similarity_threshold = tenant.es_similarity_threshold
|
||||||
|
default_catalog.chat_RAG_temperature = tenant.chat_RAG_temperature
|
||||||
|
default_catalog.chat_no_RAG_temperature = tenant.chat_no_RAG_temperature
|
||||||
|
default_catalog.embed_tuning = tenant.embed_tuning
|
||||||
|
default_catalog.rag_tuning = tenant.rag_tuning
|
||||||
|
|
||||||
|
session.add(default_catalog)
|
||||||
|
session.commit()
|
||||||
|
|
||||||
|
new_catalog_id = default_catalog.id
|
||||||
|
logger.info(f"Default catalog created with ID: {new_catalog_id}")
|
||||||
|
|
||||||
|
# Step 2: Update all documents to use this new catalog
|
||||||
|
logger.info("Updating documents with the new catalog ID.")
|
||||||
|
documents = session.query(Document).all()
|
||||||
|
for document in documents:
|
||||||
|
document.catalog_id = new_catalog_id
|
||||||
|
session.commit()
|
||||||
|
|
||||||
|
logger.info(f"Updated {len(documents)} documents with new catalog ID.")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"An error occurred during migration: {e}")
|
||||||
|
session.rollback()
|
||||||
|
raise
|
||||||
|
finally:
|
||||||
|
session.close()
|
||||||
|
|
||||||
|
logger.info("Migration completed successfully.")
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade():
|
||||||
|
pass
|
||||||
@@ -17,7 +17,7 @@
|
|||||||
document.addEventListener('DOMContentLoaded', function() {
|
document.addEventListener('DOMContentLoaded', function() {
|
||||||
const eveAI = new EveAI(
|
const eveAI = new EveAI(
|
||||||
'2',
|
'2',
|
||||||
'EveAI-CHAT-9079-8604-7441-6496-7604',
|
'EveAI-CHAT-6525-5692-6412-5828-7546',
|
||||||
'http://macstudio.ask-eve-ai-local.com',
|
'http://macstudio.ask-eve-ai-local.com',
|
||||||
'en',
|
'en',
|
||||||
'en,fr,nl',
|
'en,fr,nl',
|
||||||
|
|||||||
Reference in New Issue
Block a user