Compare commits
11 Commits
1.0.1-alfa
...
v1.0.6-alf
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1fa33c029b | ||
|
|
bcf7d439f3 | ||
|
|
b9acf4d2ae | ||
|
|
ae7bf3dbae | ||
|
|
914c265afe | ||
|
|
a158655247 | ||
|
|
bc350af247 | ||
|
|
6062b7646c | ||
|
|
122d1a18df | ||
|
|
2ca006d82c | ||
|
|
a9f9b04117 |
29
.gitignore
vendored
29
.gitignore
vendored
@@ -12,3 +12,32 @@ docker/tenant_files/
|
|||||||
**/.DS_Store
|
**/.DS_Store
|
||||||
__pycache__
|
__pycache__
|
||||||
**/__pycache__
|
**/__pycache__
|
||||||
|
/.idea
|
||||||
|
*.pyc
|
||||||
|
*.pyc
|
||||||
|
common/.DS_Store
|
||||||
|
common/__pycache__/__init__.cpython-312.pyc
|
||||||
|
common/__pycache__/extensions.cpython-312.pyc
|
||||||
|
common/models/__pycache__/__init__.cpython-312.pyc
|
||||||
|
common/models/__pycache__/document.cpython-312.pyc
|
||||||
|
common/models/__pycache__/interaction.cpython-312.pyc
|
||||||
|
common/models/__pycache__/user.cpython-312.pyc
|
||||||
|
common/utils/.DS_Store
|
||||||
|
common/utils/__pycache__/__init__.cpython-312.pyc
|
||||||
|
common/utils/__pycache__/celery_utils.cpython-312.pyc
|
||||||
|
common/utils/__pycache__/nginx_utils.cpython-312.pyc
|
||||||
|
common/utils/__pycache__/security.cpython-312.pyc
|
||||||
|
common/utils/__pycache__/simple_encryption.cpython-312.pyc
|
||||||
|
common/utils/__pycache__/template_filters.cpython-312.pyc
|
||||||
|
config/.DS_Store
|
||||||
|
config/__pycache__/__init__.cpython-312.pyc
|
||||||
|
config/__pycache__/config.cpython-312.pyc
|
||||||
|
config/__pycache__/logging_config.cpython-312.pyc
|
||||||
|
eveai_app/.DS_Store
|
||||||
|
eveai_app/__pycache__/__init__.cpython-312.pyc
|
||||||
|
eveai_app/__pycache__/errors.cpython-312.pyc
|
||||||
|
eveai_chat/.DS_Store
|
||||||
|
migrations/.DS_Store
|
||||||
|
migrations/public/.DS_Store
|
||||||
|
scripts/.DS_Store
|
||||||
|
scripts/__pycache__/run_eveai_app.cpython-312.pyc
|
||||||
|
|||||||
80
CHANGELOG.md
Normal file
80
CHANGELOG.md
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
# Changelog
|
||||||
|
|
||||||
|
All notable changes to EveAI will be documented in this file.
|
||||||
|
|
||||||
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||||
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||||
|
|
||||||
|
## [Unreleased]
|
||||||
|
|
||||||
|
### Added
|
||||||
|
- For new features.
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
- For changes in existing functionality.
|
||||||
|
|
||||||
|
### Deprecated
|
||||||
|
- For soon-to-be removed features.
|
||||||
|
|
||||||
|
### Removed
|
||||||
|
- For now removed features.
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- For any bug fixes.
|
||||||
|
|
||||||
|
### Security
|
||||||
|
- In case of vulnerabilities.
|
||||||
|
|
||||||
|
## [1.0.5-alfa] - 2024-09-02
|
||||||
|
|
||||||
|
### Added
|
||||||
|
- Allow chatwidget to connect to multiple servers (e.g. development and production)
|
||||||
|
- Start implementation of API
|
||||||
|
- Add API-key functionality to tenants
|
||||||
|
- Deduplication of API and Document view code
|
||||||
|
- Allow URL addition to accept all types of files, not just HTML
|
||||||
|
- Allow new file types upload: srt, mp3, ogg, mp4
|
||||||
|
- Improve processing of different file types using Processor classes
|
||||||
|
|
||||||
|
### Removed
|
||||||
|
- Removed direct upload of Youtube URLs, due to continuous changes in Youtube website
|
||||||
|
|
||||||
|
## [1.0.4-alfa] - 2024-08-27
|
||||||
|
Skipped
|
||||||
|
|
||||||
|
## [1.0.3-alfa] - 2024-08-27
|
||||||
|
|
||||||
|
### Added
|
||||||
|
- Refinement of HTML processing - allow for excluded classes and elements.
|
||||||
|
- Allow for multiple instances of Evie on 1 website (pure + Wordpress plugin)
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
- PDF Processing extracted in new PDF Processor class.
|
||||||
|
- Allow for longer and more complex PDFs to be uploaded.
|
||||||
|
|
||||||
|
## [1.0.2-alfa] - 2024-08-22
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- Bugfix for ResetPasswordForm in config.py
|
||||||
|
|
||||||
|
## [1.0.1-alfa] - 2024-08-21
|
||||||
|
|
||||||
|
### Added
|
||||||
|
- Full Document Version Overview
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
- Improvements to user creation and registration, renewal of passwords, ...
|
||||||
|
|
||||||
|
## [1.0.0-alfa] - 2024-08-16
|
||||||
|
|
||||||
|
### Added
|
||||||
|
- Initial release of the project.
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
- None
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- None
|
||||||
|
|
||||||
|
[Unreleased]: https://github.com/username/repo/compare/v1.0.0...HEAD
|
||||||
|
[1.0.0]: https://github.com/username/repo/releases/tag/v1.0.0
|
||||||
@@ -9,6 +9,7 @@ from flask_socketio import SocketIO
|
|||||||
from flask_jwt_extended import JWTManager
|
from flask_jwt_extended import JWTManager
|
||||||
from flask_session import Session
|
from flask_session import Session
|
||||||
from flask_wtf import CSRFProtect
|
from flask_wtf import CSRFProtect
|
||||||
|
from flask_restful import Api
|
||||||
|
|
||||||
from .utils.nginx_utils import prefixed_url_for
|
from .utils.nginx_utils import prefixed_url_for
|
||||||
from .utils.simple_encryption import SimpleEncryption
|
from .utils.simple_encryption import SimpleEncryption
|
||||||
@@ -27,6 +28,7 @@ cors = CORS()
|
|||||||
socketio = SocketIO()
|
socketio = SocketIO()
|
||||||
jwt = JWTManager()
|
jwt = JWTManager()
|
||||||
session = Session()
|
session = Session()
|
||||||
|
api = Api()
|
||||||
|
|
||||||
# kms_client = JosKMSClient.from_service_account_json('config/gc_sa_eveai.json')
|
# kms_client = JosKMSClient.from_service_account_json('config/gc_sa_eveai.json')
|
||||||
|
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ class Document(db.Model):
|
|||||||
|
|
||||||
# Versioning Information
|
# Versioning Information
|
||||||
created_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now())
|
created_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now())
|
||||||
created_by = db.Column(db.Integer, db.ForeignKey(User.id), nullable=False)
|
created_by = db.Column(db.Integer, db.ForeignKey(User.id), nullable=True)
|
||||||
updated_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now(), onupdate=db.func.now())
|
updated_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now(), onupdate=db.func.now())
|
||||||
updated_by = db.Column(db.Integer, db.ForeignKey(User.id))
|
updated_by = db.Column(db.Integer, db.ForeignKey(User.id))
|
||||||
|
|
||||||
|
|||||||
@@ -35,10 +35,11 @@ class Tenant(db.Model):
|
|||||||
html_end_tags = db.Column(ARRAY(sa.String(10)), nullable=True, default=['p', 'li'])
|
html_end_tags = db.Column(ARRAY(sa.String(10)), nullable=True, default=['p', 'li'])
|
||||||
html_included_elements = db.Column(ARRAY(sa.String(50)), nullable=True)
|
html_included_elements = db.Column(ARRAY(sa.String(50)), nullable=True)
|
||||||
html_excluded_elements = db.Column(ARRAY(sa.String(50)), nullable=True)
|
html_excluded_elements = db.Column(ARRAY(sa.String(50)), nullable=True)
|
||||||
|
html_excluded_classes = db.Column(ARRAY(sa.String(200)), nullable=True)
|
||||||
|
|
||||||
min_chunk_size = db.Column(db.Integer, nullable=True, default=2000)
|
min_chunk_size = db.Column(db.Integer, nullable=True, default=2000)
|
||||||
max_chunk_size = db.Column(db.Integer, nullable=True, default=3000)
|
max_chunk_size = db.Column(db.Integer, nullable=True, default=3000)
|
||||||
|
|
||||||
|
|
||||||
# Embedding search variables
|
# Embedding search variables
|
||||||
es_k = db.Column(db.Integer, nullable=True, default=5)
|
es_k = db.Column(db.Integer, nullable=True, default=5)
|
||||||
es_similarity_threshold = db.Column(db.Float, nullable=True, default=0.7)
|
es_similarity_threshold = db.Column(db.Float, nullable=True, default=0.7)
|
||||||
@@ -53,6 +54,8 @@ class Tenant(db.Model):
|
|||||||
license_end_date = db.Column(db.Date, nullable=True)
|
license_end_date = db.Column(db.Date, nullable=True)
|
||||||
allowed_monthly_interactions = db.Column(db.Integer, nullable=True)
|
allowed_monthly_interactions = db.Column(db.Integer, nullable=True)
|
||||||
encrypted_chat_api_key = db.Column(db.String(500), nullable=True)
|
encrypted_chat_api_key = db.Column(db.String(500), nullable=True)
|
||||||
|
encrypted_api_key = db.Column(db.String(500), nullable=True)
|
||||||
|
|
||||||
|
|
||||||
# Tuning enablers
|
# Tuning enablers
|
||||||
embed_tuning = db.Column(db.Boolean, nullable=True, default=False)
|
embed_tuning = db.Column(db.Boolean, nullable=True, default=False)
|
||||||
@@ -80,6 +83,7 @@ class Tenant(db.Model):
|
|||||||
'html_end_tags': self.html_end_tags,
|
'html_end_tags': self.html_end_tags,
|
||||||
'html_included_elements': self.html_included_elements,
|
'html_included_elements': self.html_included_elements,
|
||||||
'html_excluded_elements': self.html_excluded_elements,
|
'html_excluded_elements': self.html_excluded_elements,
|
||||||
|
'html_excluded_classes': self.html_excluded_classes,
|
||||||
'min_chunk_size': self.min_chunk_size,
|
'min_chunk_size': self.min_chunk_size,
|
||||||
'max_chunk_size': self.max_chunk_size,
|
'max_chunk_size': self.max_chunk_size,
|
||||||
'es_k': self.es_k,
|
'es_k': self.es_k,
|
||||||
|
|||||||
230
common/utils/document_utils.py
Normal file
230
common/utils/document_utils.py
Normal file
@@ -0,0 +1,230 @@
|
|||||||
|
from datetime import datetime as dt, timezone as tz
|
||||||
|
from sqlalchemy.exc import SQLAlchemyError
|
||||||
|
from werkzeug.utils import secure_filename
|
||||||
|
from common.models.document import Document, DocumentVersion
|
||||||
|
from common.extensions import db, minio_client
|
||||||
|
from common.utils.celery_utils import current_celery
|
||||||
|
from flask import current_app
|
||||||
|
import requests
|
||||||
|
from urllib.parse import urlparse, unquote
|
||||||
|
import os
|
||||||
|
from .eveai_exceptions import EveAIInvalidLanguageException, EveAIDoubleURLException, EveAIUnsupportedFileType, \
|
||||||
|
EveAIYoutubeError
|
||||||
|
|
||||||
|
|
||||||
|
def create_document_stack(api_input, file, filename, extension, tenant_id):
|
||||||
|
# Create the Document
|
||||||
|
new_doc = create_document(api_input, filename, tenant_id)
|
||||||
|
db.session.add(new_doc)
|
||||||
|
|
||||||
|
# Create the DocumentVersion
|
||||||
|
new_doc_vers = create_version_for_document(new_doc,
|
||||||
|
api_input.get('url', ''),
|
||||||
|
api_input.get('language', 'en'),
|
||||||
|
api_input.get('user_context', '')
|
||||||
|
)
|
||||||
|
db.session.add(new_doc_vers)
|
||||||
|
|
||||||
|
try:
|
||||||
|
db.session.commit()
|
||||||
|
except SQLAlchemyError as e:
|
||||||
|
current_app.logger.error(f'Error adding document for tenant {tenant_id}: {e}')
|
||||||
|
db.session.rollback()
|
||||||
|
raise
|
||||||
|
|
||||||
|
current_app.logger.info(f'Document added successfully for tenant {tenant_id}, '
|
||||||
|
f'Document Version {new_doc.id}')
|
||||||
|
|
||||||
|
# Upload file to storage
|
||||||
|
upload_file_for_version(new_doc_vers, file, extension, tenant_id)
|
||||||
|
|
||||||
|
return new_doc, new_doc_vers
|
||||||
|
|
||||||
|
|
||||||
|
def create_document(form, filename, tenant_id):
|
||||||
|
new_doc = Document()
|
||||||
|
if form['name'] == '':
|
||||||
|
new_doc.name = filename.rsplit('.', 1)[0]
|
||||||
|
else:
|
||||||
|
new_doc.name = form['name']
|
||||||
|
|
||||||
|
if form['valid_from'] and form['valid_from'] != '':
|
||||||
|
new_doc.valid_from = form['valid_from']
|
||||||
|
else:
|
||||||
|
new_doc.valid_from = dt.now(tz.utc)
|
||||||
|
new_doc.tenant_id = tenant_id
|
||||||
|
set_logging_information(new_doc, dt.now(tz.utc))
|
||||||
|
|
||||||
|
return new_doc
|
||||||
|
|
||||||
|
|
||||||
|
def create_version_for_document(document, url, language, user_context):
|
||||||
|
new_doc_vers = DocumentVersion()
|
||||||
|
if url != '':
|
||||||
|
new_doc_vers.url = url
|
||||||
|
|
||||||
|
if language == '':
|
||||||
|
raise EveAIInvalidLanguageException('Language is required for document creation!')
|
||||||
|
else:
|
||||||
|
new_doc_vers.language = language
|
||||||
|
|
||||||
|
if user_context != '':
|
||||||
|
new_doc_vers.user_context = user_context
|
||||||
|
|
||||||
|
new_doc_vers.document = document
|
||||||
|
|
||||||
|
set_logging_information(new_doc_vers, dt.now(tz.utc))
|
||||||
|
|
||||||
|
return new_doc_vers
|
||||||
|
|
||||||
|
|
||||||
|
def upload_file_for_version(doc_vers, file, extension, tenant_id):
|
||||||
|
doc_vers.file_type = extension
|
||||||
|
doc_vers.file_name = doc_vers.calc_file_name()
|
||||||
|
doc_vers.file_location = doc_vers.calc_file_location()
|
||||||
|
|
||||||
|
# Normally, the tenant bucket should exist. But let's be on the safe side if a migration took place.
|
||||||
|
minio_client.create_tenant_bucket(tenant_id)
|
||||||
|
|
||||||
|
try:
|
||||||
|
minio_client.upload_document_file(
|
||||||
|
tenant_id,
|
||||||
|
doc_vers.doc_id,
|
||||||
|
doc_vers.language,
|
||||||
|
doc_vers.id,
|
||||||
|
doc_vers.file_name,
|
||||||
|
file
|
||||||
|
)
|
||||||
|
db.session.commit()
|
||||||
|
current_app.logger.info(f'Successfully saved document to MinIO for tenant {tenant_id} for '
|
||||||
|
f'document version {doc_vers.id} while uploading file.')
|
||||||
|
except Exception as e:
|
||||||
|
db.session.rollback()
|
||||||
|
current_app.logger.error(
|
||||||
|
f'Error saving document to MinIO for tenant {tenant_id}: {e}')
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
def set_logging_information(obj, timestamp):
|
||||||
|
obj.created_at = timestamp
|
||||||
|
obj.updated_at = timestamp
|
||||||
|
|
||||||
|
|
||||||
|
def update_logging_information(obj, timestamp):
|
||||||
|
obj.updated_at = timestamp
|
||||||
|
|
||||||
|
|
||||||
|
def get_extension_from_content_type(content_type):
|
||||||
|
content_type_map = {
|
||||||
|
'text/html': 'html',
|
||||||
|
'application/pdf': 'pdf',
|
||||||
|
'text/plain': 'txt',
|
||||||
|
'application/msword': 'doc',
|
||||||
|
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'docx',
|
||||||
|
# Add more mappings as needed
|
||||||
|
}
|
||||||
|
return content_type_map.get(content_type, 'html') # Default to 'html' if unknown
|
||||||
|
|
||||||
|
|
||||||
|
def process_url(url, tenant_id):
|
||||||
|
response = requests.head(url, allow_redirects=True)
|
||||||
|
content_type = response.headers.get('Content-Type', '').split(';')[0]
|
||||||
|
|
||||||
|
# Determine file extension based on Content-Type
|
||||||
|
extension = get_extension_from_content_type(content_type)
|
||||||
|
|
||||||
|
# Generate filename
|
||||||
|
parsed_url = urlparse(url)
|
||||||
|
path = unquote(parsed_url.path)
|
||||||
|
filename = os.path.basename(path)
|
||||||
|
|
||||||
|
if not filename or '.' not in filename:
|
||||||
|
# Use the last part of the path or a default name
|
||||||
|
filename = path.strip('/').split('/')[-1] or 'document'
|
||||||
|
filename = secure_filename(f"{filename}.{extension}")
|
||||||
|
else:
|
||||||
|
filename = secure_filename(filename)
|
||||||
|
|
||||||
|
# Check if a document with this URL already exists
|
||||||
|
existing_doc = DocumentVersion.query.filter_by(url=url).first()
|
||||||
|
if existing_doc:
|
||||||
|
raise EveAIDoubleURLException
|
||||||
|
|
||||||
|
# Download the content
|
||||||
|
response = requests.get(url)
|
||||||
|
response.raise_for_status()
|
||||||
|
file_content = response.content
|
||||||
|
|
||||||
|
return file_content, filename, extension
|
||||||
|
|
||||||
|
|
||||||
|
def process_multiple_urls(urls, tenant_id, api_input):
|
||||||
|
results = []
|
||||||
|
for url in urls:
|
||||||
|
try:
|
||||||
|
file_content, filename, extension = process_url(url, tenant_id)
|
||||||
|
|
||||||
|
url_input = api_input.copy()
|
||||||
|
url_input.update({
|
||||||
|
'url': url,
|
||||||
|
'name': f"{api_input['name']}-{filename}" if api_input['name'] else filename
|
||||||
|
})
|
||||||
|
|
||||||
|
new_doc, new_doc_vers = create_document_stack(url_input, file_content, filename, extension, tenant_id)
|
||||||
|
task_id = start_embedding_task(tenant_id, new_doc_vers.id)
|
||||||
|
|
||||||
|
results.append({
|
||||||
|
'url': url,
|
||||||
|
'document_id': new_doc.id,
|
||||||
|
'document_version_id': new_doc_vers.id,
|
||||||
|
'task_id': task_id,
|
||||||
|
'status': 'success'
|
||||||
|
})
|
||||||
|
except Exception as e:
|
||||||
|
current_app.logger.error(f"Error processing URL {url}: {str(e)}")
|
||||||
|
results.append({
|
||||||
|
'url': url,
|
||||||
|
'status': 'error',
|
||||||
|
'message': str(e)
|
||||||
|
})
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def prepare_youtube_document(url, tenant_id, api_input):
|
||||||
|
try:
|
||||||
|
filename = f"placeholder.youtube"
|
||||||
|
extension = 'youtube'
|
||||||
|
|
||||||
|
new_doc = create_document(api_input, filename, tenant_id)
|
||||||
|
new_doc_vers = create_version_for_document(new_doc, url, api_input['language'], api_input['user_context'])
|
||||||
|
|
||||||
|
new_doc_vers.file_type = extension
|
||||||
|
new_doc_vers.file_name = new_doc_vers.calc_file_name()
|
||||||
|
new_doc_vers.file_location = new_doc_vers.calc_file_location()
|
||||||
|
|
||||||
|
db.session.add(new_doc)
|
||||||
|
db.session.add(new_doc_vers)
|
||||||
|
db.session.commit()
|
||||||
|
|
||||||
|
return new_doc, new_doc_vers
|
||||||
|
except Exception as e:
|
||||||
|
raise EveAIYoutubeError(f"Error preparing YouTube document: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
def start_embedding_task(tenant_id, doc_vers_id):
|
||||||
|
task = current_celery.send_task('create_embeddings', queue='embeddings', args=[
|
||||||
|
tenant_id,
|
||||||
|
doc_vers_id,
|
||||||
|
])
|
||||||
|
current_app.logger.info(f'Embedding creation started for tenant {tenant_id}, '
|
||||||
|
f'Document Version {doc_vers_id}. '
|
||||||
|
f'Embedding creation task: {task.id}')
|
||||||
|
return task.id
|
||||||
|
|
||||||
|
|
||||||
|
def validate_file_type(extension):
|
||||||
|
current_app.logger.debug(f'Validating file type {extension}')
|
||||||
|
current_app.logger.debug(f'Supported file types: {current_app.config["SUPPORTED_FILE_TYPES"]}')
|
||||||
|
if extension not in current_app.config['SUPPORTED_FILE_TYPES']:
|
||||||
|
raise EveAIUnsupportedFileType(f"Filetype {extension} is currently not supported. "
|
||||||
|
f"Supported filetypes: {', '.join(current_app.config['SUPPORTED_FILE_TYPES'])}")
|
||||||
43
common/utils/eveai_exceptions.py
Normal file
43
common/utils/eveai_exceptions.py
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
class EveAIException(Exception):
|
||||||
|
"""Base exception class for EveAI API"""
|
||||||
|
|
||||||
|
def __init__(self, message, status_code=400, payload=None):
|
||||||
|
super().__init__()
|
||||||
|
self.message = message
|
||||||
|
self.status_code = status_code
|
||||||
|
self.payload = payload
|
||||||
|
|
||||||
|
def to_dict(self):
|
||||||
|
rv = dict(self.payload or ())
|
||||||
|
rv['message'] = self.message
|
||||||
|
return rv
|
||||||
|
|
||||||
|
|
||||||
|
class EveAIInvalidLanguageException(EveAIException):
|
||||||
|
"""Raised when an invalid language is provided"""
|
||||||
|
|
||||||
|
def __init__(self, message="Langage is required", status_code=400, payload=None):
|
||||||
|
super().__init__(message, status_code, payload)
|
||||||
|
|
||||||
|
|
||||||
|
class EveAIDoubleURLException(EveAIException):
|
||||||
|
"""Raised when an existing url is provided"""
|
||||||
|
|
||||||
|
def __init__(self, message="URL already exists", status_code=400, payload=None):
|
||||||
|
super().__init__(message, status_code, payload)
|
||||||
|
|
||||||
|
|
||||||
|
class EveAIUnsupportedFileType(EveAIException):
|
||||||
|
"""Raised when an invalid file type is provided"""
|
||||||
|
|
||||||
|
def __init__(self, message="Filetype is not supported", status_code=400, payload=None):
|
||||||
|
super().__init__(message, status_code, payload)
|
||||||
|
|
||||||
|
|
||||||
|
class EveAIYoutubeError(EveAIException):
|
||||||
|
"""Raised when adding a Youtube document fails"""
|
||||||
|
|
||||||
|
def __init__(self, message="Youtube document creation failed", status_code=400, payload=None):
|
||||||
|
super().__init__(message, status_code, payload)
|
||||||
|
|
||||||
|
# Add more custom exceptions as needed
|
||||||
@@ -86,6 +86,7 @@ def select_model_variables(tenant):
|
|||||||
model_variables['html_end_tags'] = tenant.html_end_tags
|
model_variables['html_end_tags'] = tenant.html_end_tags
|
||||||
model_variables['html_included_elements'] = tenant.html_included_elements
|
model_variables['html_included_elements'] = tenant.html_included_elements
|
||||||
model_variables['html_excluded_elements'] = tenant.html_excluded_elements
|
model_variables['html_excluded_elements'] = tenant.html_excluded_elements
|
||||||
|
model_variables['html_excluded_classes'] = tenant.html_excluded_classes
|
||||||
|
|
||||||
# Set Chunk Size variables
|
# Set Chunk Size variables
|
||||||
model_variables['min_chunk_size'] = tenant.min_chunk_size
|
model_variables['min_chunk_size'] = tenant.min_chunk_size
|
||||||
@@ -144,8 +145,12 @@ def select_model_variables(tenant):
|
|||||||
default_headers=portkey_headers)
|
default_headers=portkey_headers)
|
||||||
tool_calling_supported = False
|
tool_calling_supported = False
|
||||||
match llm_model:
|
match llm_model:
|
||||||
case 'gpt-4-turbo' | 'gpt-4o' | 'gpt-4o-mini':
|
case 'gpt-4o' | 'gpt-4o-mini':
|
||||||
tool_calling_supported = True
|
tool_calling_supported = True
|
||||||
|
PDF_chunk_size = 10000
|
||||||
|
PDF_chunk_overlap = 200
|
||||||
|
PDF_min_chunk_size = 8000
|
||||||
|
PDF_max_chunk_size = 12000
|
||||||
case _:
|
case _:
|
||||||
raise Exception(f'Error setting model variables for tenant {tenant.id} '
|
raise Exception(f'Error setting model variables for tenant {tenant.id} '
|
||||||
f'error: Invalid chat model')
|
f'error: Invalid chat model')
|
||||||
@@ -160,10 +165,19 @@ def select_model_variables(tenant):
|
|||||||
model=llm_model_ext,
|
model=llm_model_ext,
|
||||||
temperature=model_variables['RAG_temperature'])
|
temperature=model_variables['RAG_temperature'])
|
||||||
tool_calling_supported = True
|
tool_calling_supported = True
|
||||||
|
PDF_chunk_size = 10000
|
||||||
|
PDF_chunk_overlap = 200
|
||||||
|
PDF_min_chunk_size = 8000
|
||||||
|
PDF_max_chunk_size = 12000
|
||||||
case _:
|
case _:
|
||||||
raise Exception(f'Error setting model variables for tenant {tenant.id} '
|
raise Exception(f'Error setting model variables for tenant {tenant.id} '
|
||||||
f'error: Invalid chat provider')
|
f'error: Invalid chat provider')
|
||||||
|
|
||||||
|
model_variables['PDF_chunk_size'] = PDF_chunk_size
|
||||||
|
model_variables['PDF_chunk_overlap'] = PDF_chunk_overlap
|
||||||
|
model_variables['PDF_min_chunk_size'] = PDF_min_chunk_size
|
||||||
|
model_variables['PDF_max_chunk_size'] = PDF_max_chunk_size
|
||||||
|
|
||||||
if tool_calling_supported:
|
if tool_calling_supported:
|
||||||
model_variables['cited_answer_cls'] = CitedAnswer
|
model_variables['cited_answer_cls'] = CitedAnswer
|
||||||
|
|
||||||
|
|||||||
@@ -3,7 +3,6 @@ from datetime import timedelta
|
|||||||
import redis
|
import redis
|
||||||
|
|
||||||
from common.utils.prompt_loader import load_prompt_templates
|
from common.utils.prompt_loader import load_prompt_templates
|
||||||
from eveai_app.views.security_forms import ResetPasswordForm
|
|
||||||
|
|
||||||
basedir = path.abspath(path.dirname(__file__))
|
basedir = path.abspath(path.dirname(__file__))
|
||||||
|
|
||||||
@@ -46,7 +45,6 @@ class Config(object):
|
|||||||
SECURITY_EMAIL_SUBJECT_PASSWORD_NOTICE = 'Your Password Has Been Reset'
|
SECURITY_EMAIL_SUBJECT_PASSWORD_NOTICE = 'Your Password Has Been Reset'
|
||||||
SECURITY_EMAIL_PLAINTEXT = False
|
SECURITY_EMAIL_PLAINTEXT = False
|
||||||
SECURITY_EMAIL_HTML = True
|
SECURITY_EMAIL_HTML = True
|
||||||
SECURITY_RESET_PASSWORD_FORM = ResetPasswordForm
|
|
||||||
|
|
||||||
# Ensure Flask-Security-Too is handling CSRF tokens when behind a proxy
|
# Ensure Flask-Security-Too is handling CSRF tokens when behind a proxy
|
||||||
SECURITY_CSRF_PROTECT_MECHANISMS = ['session']
|
SECURITY_CSRF_PROTECT_MECHANISMS = ['session']
|
||||||
@@ -55,7 +53,7 @@ class Config(object):
|
|||||||
WTF_CSRF_CHECK_DEFAULT = False
|
WTF_CSRF_CHECK_DEFAULT = False
|
||||||
|
|
||||||
# file upload settings
|
# file upload settings
|
||||||
MAX_CONTENT_LENGTH = 16 * 1024 * 1024
|
MAX_CONTENT_LENGTH = 50 * 1024 * 1024
|
||||||
UPLOAD_EXTENSIONS = ['.txt', '.pdf', '.png', '.jpg', '.jpeg', '.gif']
|
UPLOAD_EXTENSIONS = ['.txt', '.pdf', '.png', '.jpg', '.jpeg', '.gif']
|
||||||
|
|
||||||
# supported languages
|
# supported languages
|
||||||
@@ -138,6 +136,10 @@ class Config(object):
|
|||||||
MAIL_PASSWORD = environ.get('MAIL_PASSWORD')
|
MAIL_PASSWORD = environ.get('MAIL_PASSWORD')
|
||||||
MAIL_DEFAULT_SENDER = ('eveAI Admin', MAIL_USERNAME)
|
MAIL_DEFAULT_SENDER = ('eveAI Admin', MAIL_USERNAME)
|
||||||
|
|
||||||
|
SUPPORTED_FILE_TYPES = ['pdf', 'html', 'md', 'txt', 'mp3', 'mp4', 'ogg', 'srt']
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class DevConfig(Config):
|
class DevConfig(Config):
|
||||||
DEVELOPMENT = True
|
DEVELOPMENT = True
|
||||||
|
|||||||
@@ -60,6 +60,14 @@ LOGGING = {
|
|||||||
'backupCount': 10,
|
'backupCount': 10,
|
||||||
'formatter': 'standard',
|
'formatter': 'standard',
|
||||||
},
|
},
|
||||||
|
'file_api': {
|
||||||
|
'level': 'DEBUG',
|
||||||
|
'class': 'logging.handlers.RotatingFileHandler',
|
||||||
|
'filename': 'logs/eveai_api.log',
|
||||||
|
'maxBytes': 1024 * 1024 * 5, # 5MB
|
||||||
|
'backupCount': 10,
|
||||||
|
'formatter': 'standard',
|
||||||
|
},
|
||||||
'file_sqlalchemy': {
|
'file_sqlalchemy': {
|
||||||
'level': 'DEBUG',
|
'level': 'DEBUG',
|
||||||
'class': 'logging.handlers.RotatingFileHandler',
|
'class': 'logging.handlers.RotatingFileHandler',
|
||||||
@@ -146,6 +154,11 @@ LOGGING = {
|
|||||||
'level': 'DEBUG',
|
'level': 'DEBUG',
|
||||||
'propagate': False
|
'propagate': False
|
||||||
},
|
},
|
||||||
|
'eveai_api': { # logger for the eveai_chat_workers
|
||||||
|
'handlers': ['file_api', 'graylog', ] if env == 'production' else ['file_api', ],
|
||||||
|
'level': 'DEBUG',
|
||||||
|
'propagate': False
|
||||||
|
},
|
||||||
'sqlalchemy.engine': { # logger for the sqlalchemy
|
'sqlalchemy.engine': { # logger for the sqlalchemy
|
||||||
'handlers': ['file_sqlalchemy', 'graylog', ] if env == 'production' else ['file_sqlalchemy', ],
|
'handlers': ['file_sqlalchemy', 'graylog', ] if env == 'production' else ['file_sqlalchemy', ],
|
||||||
'level': 'DEBUG',
|
'level': 'DEBUG',
|
||||||
|
|||||||
@@ -15,11 +15,12 @@ html_parse: |
|
|||||||
|
|
||||||
pdf_parse: |
|
pdf_parse: |
|
||||||
You are a top administrative aid specialized in transforming given PDF-files into markdown formatted files. The generated files will be used to generate embeddings in a RAG-system.
|
You are a top administrative aid specialized in transforming given PDF-files into markdown formatted files. The generated files will be used to generate embeddings in a RAG-system.
|
||||||
|
The content you get is already processed (some markdown already generated), but needs to be corrected. For large files, you may receive only portions of the full file. Consider this when processing the content.
|
||||||
|
|
||||||
# Best practices are:
|
# Best practices are:
|
||||||
- Respect wordings and language(s) used in the PDF.
|
- Respect wordings and language(s) used in the provided content.
|
||||||
- The following items need to be considered: headings, paragraphs, listed items (numbered or not) and tables. Images can be neglected.
|
- The following items need to be considered: headings, paragraphs, listed items (numbered or not) and tables. Images can be neglected.
|
||||||
- When headings are numbered, show the numbering and define the header level.
|
- When headings are numbered, show the numbering and define the header level. You may have to correct current header levels, as preprocessing is known to make errors.
|
||||||
- A new item is started when a <return> is found before a full line is reached. In order to know the number of characters in a line, please check the document and the context within the document (e.g. an image could limit the number of characters temporarily).
|
- A new item is started when a <return> is found before a full line is reached. In order to know the number of characters in a line, please check the document and the context within the document (e.g. an image could limit the number of characters temporarily).
|
||||||
- Paragraphs are to be stripped of newlines so they become easily readable.
|
- Paragraphs are to be stripped of newlines so they become easily readable.
|
||||||
- Be careful of encoding of the text. Everything needs to be human readable.
|
- Be careful of encoding of the text. Everything needs to be human readable.
|
||||||
|
|||||||
@@ -57,6 +57,9 @@ services:
|
|||||||
- ../nginx/sites-enabled:/etc/nginx/sites-enabled
|
- ../nginx/sites-enabled:/etc/nginx/sites-enabled
|
||||||
- ../nginx/static:/etc/nginx/static
|
- ../nginx/static:/etc/nginx/static
|
||||||
- ../nginx/public:/etc/nginx/public
|
- ../nginx/public:/etc/nginx/public
|
||||||
|
- ../integrations/Wordpress/eveai-chat-widget/css/eveai-chat-style.css:/etc/nginx/static/css/eveai-chat-style.css
|
||||||
|
- ../integrations/Wordpress/eveai-chat-widget/js/eveai-chat-widget.js:/etc/nginx/static/js/eveai-chat-widget.js
|
||||||
|
- ../integrations/Wordpress/eveai-chat-widget/js/eveai-sdk.js:/etc/nginx/static/js/eveai-sdk.js
|
||||||
- ./logs/nginx:/var/log/nginx
|
- ./logs/nginx:/var/log/nginx
|
||||||
depends_on:
|
depends_on:
|
||||||
- eveai_app
|
- eveai_app
|
||||||
|
|||||||
@@ -10,6 +10,9 @@ COPY ../../nginx/mime.types /etc/nginx/mime.types
|
|||||||
# Copy static & public files
|
# Copy static & public files
|
||||||
RUN mkdir -p /etc/nginx/static /etc/nginx/public
|
RUN mkdir -p /etc/nginx/static /etc/nginx/public
|
||||||
COPY ../../nginx/static /etc/nginx/static
|
COPY ../../nginx/static /etc/nginx/static
|
||||||
|
COPY ../../integrations/Wordpress/eveai-chat-widget/css/eveai-chat-style.css /etc/nginx/static/css/
|
||||||
|
COPY ../../integrations/Wordpress/eveai-chat-widget/js/eveai-chat-widget.js /etc/nginx/static/js/
|
||||||
|
COPY ../../integrations/Wordpress/eveai-chat-widget/js/eveai-sdk.js /etc/nginx/static/js
|
||||||
COPY ../../nginx/public /etc/nginx/public
|
COPY ../../nginx/public /etc/nginx/public
|
||||||
|
|
||||||
# Copy site-specific configurations
|
# Copy site-specific configurations
|
||||||
|
|||||||
@@ -1,4 +1,72 @@
|
|||||||
# from flask import Blueprint, request
|
from flask import Flask, jsonify
|
||||||
#
|
from flask_jwt_extended import get_jwt_identity
|
||||||
# public_api_bp = Blueprint("public", __name__, url_prefix="/api/v1")
|
from common.extensions import db, api, jwt, minio_client
|
||||||
# tenant_api_bp = Blueprint("tenant", __name__, url_prefix="/api/v1/tenant")
|
import os
|
||||||
|
import logging.config
|
||||||
|
|
||||||
|
from common.utils.database import Database
|
||||||
|
from config.logging_config import LOGGING
|
||||||
|
from .api.document_api import AddDocumentResource
|
||||||
|
from .api.auth import TokenResource
|
||||||
|
from config.config import get_config
|
||||||
|
from common.utils.celery_utils import make_celery, init_celery
|
||||||
|
from common.utils.eveai_exceptions import EveAIException
|
||||||
|
|
||||||
|
|
||||||
|
def create_app(config_file=None):
|
||||||
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
environment = os.getenv('FLASK_ENV', 'development')
|
||||||
|
|
||||||
|
match environment:
|
||||||
|
case 'development':
|
||||||
|
app.config.from_object(get_config('dev'))
|
||||||
|
case 'production':
|
||||||
|
app.config.from_object(get_config('prod'))
|
||||||
|
case _:
|
||||||
|
app.config.from_object(get_config('dev'))
|
||||||
|
|
||||||
|
app.config['SESSION_KEY_PREFIX'] = 'eveai_api_'
|
||||||
|
|
||||||
|
app.celery = make_celery(app.name, app.config)
|
||||||
|
init_celery(app.celery, app)
|
||||||
|
|
||||||
|
logging.config.dictConfig(LOGGING)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
logger.info("eveai_api starting up")
|
||||||
|
|
||||||
|
# Register Necessary Extensions
|
||||||
|
register_extensions(app)
|
||||||
|
|
||||||
|
# Error handler for the API
|
||||||
|
@app.errorhandler(EveAIException)
|
||||||
|
def handle_eveai_exception(error):
|
||||||
|
response = jsonify(error.to_dict())
|
||||||
|
response.status_code = error.status_code
|
||||||
|
return response
|
||||||
|
|
||||||
|
@api.before_request
|
||||||
|
def before_request():
|
||||||
|
# Extract tenant_id from the JWT token
|
||||||
|
tenant_id = get_jwt_identity()
|
||||||
|
|
||||||
|
# Switch to the correct schema
|
||||||
|
Database(tenant_id).switch_schema()
|
||||||
|
|
||||||
|
# Register resources
|
||||||
|
register_api_resources()
|
||||||
|
|
||||||
|
return app
|
||||||
|
|
||||||
|
|
||||||
|
def register_extensions(app):
|
||||||
|
db.init_app(app)
|
||||||
|
api.init_app(app)
|
||||||
|
jwt.init_app(app)
|
||||||
|
minio_client.init_app(app)
|
||||||
|
|
||||||
|
|
||||||
|
def register_api_resources():
|
||||||
|
api.add_resource(AddDocumentResource, '/api/v1/documents/add_document')
|
||||||
|
api.add_resource(TokenResource, '/api/v1/token')
|
||||||
|
|||||||
24
eveai_api/api/auth.py
Normal file
24
eveai_api/api/auth.py
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
from flask_restful import Resource, reqparse
|
||||||
|
from flask_jwt_extended import create_access_token
|
||||||
|
from common.models.user import Tenant
|
||||||
|
from common.extensions import simple_encryption
|
||||||
|
from flask import current_app
|
||||||
|
|
||||||
|
|
||||||
|
class TokenResource(Resource):
|
||||||
|
def post(self):
|
||||||
|
parser = reqparse.RequestParser()
|
||||||
|
parser.add_argument('tenant_id', type=int, required=True)
|
||||||
|
parser.add_argument('api_key', type=str, required=True)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
tenant = Tenant.query.get(args['tenant_id'])
|
||||||
|
if not tenant:
|
||||||
|
return {'message': 'Tenant not found'}, 404
|
||||||
|
|
||||||
|
decrypted_api_key = simple_encryption.decrypt_api_key(tenant.encrypted_api_key)
|
||||||
|
if args['api_key'] != decrypted_api_key:
|
||||||
|
return {'message': 'Invalid API key'}, 401
|
||||||
|
|
||||||
|
access_token = create_access_token(identity={'tenant_id': tenant.id})
|
||||||
|
return {'access_token': access_token}, 200
|
||||||
178
eveai_api/api/document_api.py
Normal file
178
eveai_api/api/document_api.py
Normal file
@@ -0,0 +1,178 @@
|
|||||||
|
from flask_restful import Resource, reqparse
|
||||||
|
from flask import current_app
|
||||||
|
from flask_jwt_extended import jwt_required, get_jwt_identity
|
||||||
|
from werkzeug.datastructures import FileStorage
|
||||||
|
from werkzeug.utils import secure_filename
|
||||||
|
from common.utils.document_utils import (
|
||||||
|
create_document_stack, process_url, start_embedding_task,
|
||||||
|
validate_file_type, EveAIInvalidLanguageException, EveAIDoubleURLException, EveAIUnsupportedFileType,
|
||||||
|
process_multiple_urls, prepare_youtube_document
|
||||||
|
)
|
||||||
|
from common.utils.eveai_exceptions import EveAIYoutubeError
|
||||||
|
|
||||||
|
|
||||||
|
class AddDocumentResource(Resource):
|
||||||
|
@jwt_required()
|
||||||
|
def post(self):
|
||||||
|
parser = reqparse.RequestParser()
|
||||||
|
parser.add_argument('file', type=FileStorage, location='files', required=True)
|
||||||
|
parser.add_argument('name', type=str, required=False)
|
||||||
|
parser.add_argument('language', type=str, required=True)
|
||||||
|
parser.add_argument('user_context', type=str, required=False)
|
||||||
|
parser.add_argument('valid_from', type=str, required=False)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
tenant_id = get_jwt_identity()
|
||||||
|
current_app.logger.info(f'Adding document for tenant {tenant_id}')
|
||||||
|
|
||||||
|
try:
|
||||||
|
file = args['file']
|
||||||
|
filename = secure_filename(file.filename)
|
||||||
|
extension = filename.rsplit('.', 1)[1].lower()
|
||||||
|
|
||||||
|
validate_file_type(extension)
|
||||||
|
|
||||||
|
api_input = {
|
||||||
|
'name': args['name'] or filename,
|
||||||
|
'language': args['language'],
|
||||||
|
'user_context': args['user_context'],
|
||||||
|
'valid_from': args['valid_from']
|
||||||
|
}
|
||||||
|
|
||||||
|
new_doc, new_doc_vers = create_document_stack(api_input, file, filename, extension, tenant_id)
|
||||||
|
task_id = start_embedding_task(tenant_id, new_doc_vers.id)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'message': f'Processing on document {new_doc.name}, version {new_doc_vers.id} started. Task ID: {task_id}.',
|
||||||
|
'document_id': new_doc.id,
|
||||||
|
'document_version_id': new_doc_vers.id,
|
||||||
|
'task_id': task_id
|
||||||
|
}, 201
|
||||||
|
|
||||||
|
except (EveAIInvalidLanguageException, EveAIUnsupportedFileType) as e:
|
||||||
|
return {'message': str(e)}, 400
|
||||||
|
except Exception as e:
|
||||||
|
current_app.logger.error(f'Error adding document: {str(e)}')
|
||||||
|
return {'message': 'Error adding document'}, 500
|
||||||
|
|
||||||
|
|
||||||
|
class AddURLResource(Resource):
|
||||||
|
@jwt_required()
|
||||||
|
def post(self):
|
||||||
|
parser = reqparse.RequestParser()
|
||||||
|
parser.add_argument('url', type=str, required=True)
|
||||||
|
parser.add_argument('name', type=str, required=False)
|
||||||
|
parser.add_argument('language', type=str, required=True)
|
||||||
|
parser.add_argument('user_context', type=str, required=False)
|
||||||
|
parser.add_argument('valid_from', type=str, required=False)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
tenant_id = get_jwt_identity()
|
||||||
|
current_app.logger.info(f'Adding document from URL for tenant {tenant_id}')
|
||||||
|
|
||||||
|
try:
|
||||||
|
file_content, filename, extension = process_url(args['url'], tenant_id)
|
||||||
|
|
||||||
|
api_input = {
|
||||||
|
'url': args['url'],
|
||||||
|
'name': args['name'] or filename,
|
||||||
|
'language': args['language'],
|
||||||
|
'user_context': args['user_context'],
|
||||||
|
'valid_from': args['valid_from']
|
||||||
|
}
|
||||||
|
|
||||||
|
new_doc, new_doc_vers = create_document_stack(api_input, file_content, filename, extension, tenant_id)
|
||||||
|
task_id = start_embedding_task(tenant_id, new_doc_vers.id)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'message': f'Processing on document {new_doc.name}, version {new_doc_vers.id} started. Task ID: {task_id}.',
|
||||||
|
'document_id': new_doc.id,
|
||||||
|
'document_version_id': new_doc_vers.id,
|
||||||
|
'task_id': task_id
|
||||||
|
}, 201
|
||||||
|
|
||||||
|
except EveAIDoubleURLException:
|
||||||
|
return {'message': f'A document with URL {args["url"]} already exists.'}, 400
|
||||||
|
except (EveAIInvalidLanguageException, EveAIUnsupportedFileType) as e:
|
||||||
|
return {'message': str(e)}, 400
|
||||||
|
except Exception as e:
|
||||||
|
current_app.logger.error(f'Error adding document from URL: {str(e)}')
|
||||||
|
return {'message': 'Error adding document from URL'}, 500
|
||||||
|
|
||||||
|
|
||||||
|
class AddMultipleURLsResource(Resource):
|
||||||
|
@jwt_required()
|
||||||
|
def post(self):
|
||||||
|
parser = reqparse.RequestParser()
|
||||||
|
parser.add_argument('urls', type=str, action='append', required=True)
|
||||||
|
parser.add_argument('name', type=str, required=False)
|
||||||
|
parser.add_argument('language', type=str, required=True)
|
||||||
|
parser.add_argument('user_context', type=str, required=False)
|
||||||
|
parser.add_argument('valid_from', type=str, required=False)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
tenant_id = get_jwt_identity()
|
||||||
|
current_app.logger.info(f'Adding multiple documents from URLs for tenant {tenant_id}')
|
||||||
|
|
||||||
|
try:
|
||||||
|
api_input = {
|
||||||
|
'name': args['name'],
|
||||||
|
'language': args['language'],
|
||||||
|
'user_context': args['user_context'],
|
||||||
|
'valid_from': args['valid_from']
|
||||||
|
}
|
||||||
|
|
||||||
|
results = process_multiple_urls(args['urls'], tenant_id, api_input)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'message': 'Processing of multiple URLs completed',
|
||||||
|
'results': results
|
||||||
|
}, 201
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
current_app.logger.error(f'Error adding documents from URLs: {str(e)}')
|
||||||
|
return {'message': 'Error adding documents from URLs'}, 500
|
||||||
|
|
||||||
|
|
||||||
|
class AddYoutubeResource(Resource):
|
||||||
|
@jwt_required()
|
||||||
|
def post(self):
|
||||||
|
parser = reqparse.RequestParser()
|
||||||
|
parser.add_argument('url', type=str, required=True)
|
||||||
|
parser.add_argument('name', type=str, required=False)
|
||||||
|
parser.add_argument('language', type=str, required=True)
|
||||||
|
parser.add_argument('user_context', type=str, required=False)
|
||||||
|
parser.add_argument('valid_from', type=str, required=False)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
tenant_id = get_jwt_identity()
|
||||||
|
current_app.logger.info(f'Adding YouTube document for tenant {tenant_id}')
|
||||||
|
|
||||||
|
try:
|
||||||
|
api_input = {
|
||||||
|
'name': args['name'],
|
||||||
|
'language': args['language'],
|
||||||
|
'user_context': args['user_context'],
|
||||||
|
'valid_from': args['valid_from']
|
||||||
|
}
|
||||||
|
|
||||||
|
new_doc, new_doc_vers = prepare_youtube_document(args['url'], tenant_id, api_input)
|
||||||
|
task_id = start_embedding_task(tenant_id, new_doc_vers.id)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'message': f'Processing on YouTube document {new_doc.name}, version {new_doc_vers.id} started. Task ID: {task_id}.',
|
||||||
|
'document_id': new_doc.id,
|
||||||
|
'document_version_id': new_doc_vers.id,
|
||||||
|
'task_id': task_id
|
||||||
|
}, 201
|
||||||
|
|
||||||
|
except EveAIYoutubeError as e:
|
||||||
|
return {'message': str(e)}, 400
|
||||||
|
except (EveAIInvalidLanguageException, EveAIUnsupportedFileType) as e:
|
||||||
|
return {'message': str(e)}, 400
|
||||||
|
except Exception as e:
|
||||||
|
current_app.logger.error(f'Error adding YouTube document: {str(e)}')
|
||||||
|
return {'message': 'Error adding YouTube document'}, 500
|
||||||
|
|
||||||
|
|
||||||
|
# You can add more API resources here as needed
|
||||||
@@ -1,7 +0,0 @@
|
|||||||
from flask import request
|
|
||||||
from flask.views import MethodView
|
|
||||||
|
|
||||||
class RegisterAPI(MethodView):
|
|
||||||
def post(self):
|
|
||||||
username = request.json['username']
|
|
||||||
|
|
||||||
@@ -17,6 +17,7 @@ from .errors import register_error_handlers
|
|||||||
from common.utils.celery_utils import make_celery, init_celery
|
from common.utils.celery_utils import make_celery, init_celery
|
||||||
from common.utils.template_filters import register_filters
|
from common.utils.template_filters import register_filters
|
||||||
from config.config import get_config
|
from config.config import get_config
|
||||||
|
from eveai_app.views.security_forms import ResetPasswordForm
|
||||||
|
|
||||||
|
|
||||||
def create_app(config_file=None):
|
def create_app(config_file=None):
|
||||||
@@ -26,7 +27,6 @@ def create_app(config_file=None):
|
|||||||
app.wsgi_app = ProxyFix(app.wsgi_app, x_for=1, x_proto=1, x_host=1, x_port=1)
|
app.wsgi_app = ProxyFix(app.wsgi_app, x_for=1, x_proto=1, x_host=1, x_port=1)
|
||||||
|
|
||||||
environment = os.getenv('FLASK_ENV', 'development')
|
environment = os.getenv('FLASK_ENV', 'development')
|
||||||
print(environment)
|
|
||||||
|
|
||||||
match environment:
|
match environment:
|
||||||
case 'development':
|
case 'development':
|
||||||
@@ -37,6 +37,7 @@ def create_app(config_file=None):
|
|||||||
app.config.from_object(get_config('dev'))
|
app.config.from_object(get_config('dev'))
|
||||||
|
|
||||||
app.config['SESSION_KEY_PREFIX'] = 'eveai_app_'
|
app.config['SESSION_KEY_PREFIX'] = 'eveai_app_'
|
||||||
|
app.config['SECURITY_RESET_PASSWORD_FORM'] = ResetPasswordForm
|
||||||
|
|
||||||
try:
|
try:
|
||||||
os.makedirs(app.instance_path)
|
os.makedirs(app.instance_path)
|
||||||
@@ -47,8 +48,6 @@ def create_app(config_file=None):
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
logger.info("eveai_app starting up")
|
logger.info("eveai_app starting up")
|
||||||
logger.debug("start config")
|
|
||||||
logger.debug(app.config)
|
|
||||||
|
|
||||||
# Register extensions
|
# Register extensions
|
||||||
|
|
||||||
@@ -93,14 +92,11 @@ def create_app(config_file=None):
|
|||||||
}
|
}
|
||||||
return jsonify(response), 500
|
return jsonify(response), 500
|
||||||
|
|
||||||
@app.before_request
|
# @app.before_request
|
||||||
def before_request():
|
# def before_request():
|
||||||
# app.logger.debug(f"Before request - Session ID: {session.sid}")
|
# # app.logger.debug(f"Before request - Session ID: {session.sid}")
|
||||||
app.logger.debug(f"Before request - Session data: {session}")
|
# app.logger.debug(f"Before request - Session data: {session}")
|
||||||
app.logger.debug(f"Before request - Request headers: {request.headers}")
|
# app.logger.debug(f"Before request - Request headers: {request.headers}")
|
||||||
|
|
||||||
# Register API
|
|
||||||
register_api(app)
|
|
||||||
|
|
||||||
# Register template filters
|
# Register template filters
|
||||||
register_filters(app)
|
register_filters(app)
|
||||||
@@ -136,9 +132,3 @@ def register_blueprints(app):
|
|||||||
app.register_blueprint(security_bp)
|
app.register_blueprint(security_bp)
|
||||||
from .views.interaction_views import interaction_bp
|
from .views.interaction_views import interaction_bp
|
||||||
app.register_blueprint(interaction_bp)
|
app.register_blueprint(interaction_bp)
|
||||||
|
|
||||||
|
|
||||||
def register_api(app):
|
|
||||||
pass
|
|
||||||
# from . import api
|
|
||||||
# app.register_blueprint(api.bp, url_prefix='/api')
|
|
||||||
|
|||||||
@@ -1,126 +1,80 @@
|
|||||||
{% extends "base.html" %}
|
{% extends "base.html" %}
|
||||||
|
{% from "macros.html" import render_field %}
|
||||||
|
|
||||||
|
{% block title %}Session Overview{% endblock %}
|
||||||
|
|
||||||
|
{% block content_title %}Session Overview{% endblock %}
|
||||||
|
{% block content_description %}An overview of the chat session.{% endblock %}
|
||||||
|
|
||||||
{% block content %}
|
{% block content %}
|
||||||
<div class="container mt-5">
|
<div class="container mt-5">
|
||||||
<h2>Chat Session Details</h2>
|
<h2>Chat Session Details</h2>
|
||||||
<!-- Session Information -->
|
|
||||||
<div class="card mb-4">
|
<div class="card mb-4">
|
||||||
<div class="card-header">
|
<div class="card-header">
|
||||||
<h5>Session Information</h5>
|
<h5>Session Information</h5>
|
||||||
<!-- Timezone Toggle Buttons -->
|
|
||||||
<div class="btn-group" role="group">
|
|
||||||
<button type="button" class="btn btn-primary" id="toggle-interaction-timezone">Interaction Timezone</button>
|
|
||||||
<button type="button" class="btn btn-secondary" id="toggle-admin-timezone">Admin Timezone</button>
|
|
||||||
</div>
|
|
||||||
</div>
|
</div>
|
||||||
<div class="card-body">
|
<div class="card-body">
|
||||||
<dl class="row">
|
<p><strong>Session ID:</strong> {{ chat_session.session_id }}</p>
|
||||||
<dt class="col-sm-3">Session ID:</dt>
|
<p><strong>User:</strong> {{ chat_session.user.user_name if chat_session.user else 'Anonymous' }}</p>
|
||||||
<dd class="col-sm-9">{{ chat_session.session_id }}</dd>
|
<p><strong>Start:</strong> {{ chat_session.session_start | to_local_time(chat_session.timezone) }}</p>
|
||||||
|
<p><strong>End:</strong> {{ chat_session.session_end | to_local_time(chat_session.timezone) if chat_session.session_end else 'Ongoing' }}</p>
|
||||||
<dt class="col-sm-3">Session Start:</dt>
|
|
||||||
<dd class="col-sm-9">
|
|
||||||
<span class="timezone interaction-timezone">{{ chat_session.session_start | to_local_time(chat_session.timezone) }}</span>
|
|
||||||
<span class="timezone admin-timezone d-none">{{ chat_session.session_start | to_local_time(session['admin_user_timezone']) }}</span>
|
|
||||||
</dd>
|
|
||||||
|
|
||||||
<dt class="col-sm-3">Session End:</dt>
|
|
||||||
<dd class="col-sm-9">
|
|
||||||
{% if chat_session.session_end %}
|
|
||||||
<span class="timezone interaction-timezone">{{ chat_session.session_end | to_local_time(chat_session.timezone) }}</span>
|
|
||||||
<span class="timezone admin-timezone d-none">{{ chat_session.session_end | to_local_time(session['admin_user_timezone']) }}</span>
|
|
||||||
{% else %}
|
|
||||||
Ongoing
|
|
||||||
{% endif %}
|
|
||||||
</dd>
|
|
||||||
</dl>
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- Interactions List -->
|
<h3>Interactions</h3>
|
||||||
<div class="card mb-4">
|
<div class="accordion" id="interactionsAccordion">
|
||||||
<div class="card-header">
|
{% for interaction in interactions %}
|
||||||
<h5>Interactions</h5>
|
<div class="accordion-item">
|
||||||
</div>
|
<h2 class="accordion-header" id="heading{{ loop.index }}">
|
||||||
<div class="card-body">
|
<button class="accordion-button collapsed" type="button" data-bs-toggle="collapse"
|
||||||
{% for interaction in interactions %}
|
data-bs-target="#collapse{{ loop.index }}" aria-expanded="false"
|
||||||
<div class="interaction mb-3">
|
aria-controls="collapse{{ loop.index }}">
|
||||||
<div class="card">
|
<div class="d-flex justify-content-between align-items-center w-100">
|
||||||
<div class="card-header d-flex justify-content-between">
|
<span class="interaction-question">{{ interaction.question | truncate(50) }}</span>
|
||||||
<span>Question:</span>
|
<span class="interaction-icons">
|
||||||
<span class="text-muted">
|
<i class="material-icons algorithm-icon {{ interaction.algorithm_used | lower }}">fingerprint</i>
|
||||||
<span class="timezone interaction-timezone">{{ interaction.question_at | to_local_time(interaction.timezone) }}</span>
|
<i class="material-icons thumb-icon {% if interaction.appreciation == 100 %}filled{% else %}outlined{% endif %}">thumb_up</i>
|
||||||
<span class="timezone admin-timezone d-none">{{ interaction.question_at | to_local_time(session['admin_user_timezone']) }}</span>
|
<i class="material-icons thumb-icon {% if interaction.appreciation == 0 %}filled{% else %}outlined{% endif %}">thumb_down</i>
|
||||||
-
|
</span>
|
||||||
<span class="timezone interaction-timezone">{{ interaction.answer_at | to_local_time(interaction.timezone) }}</span>
|
|
||||||
<span class="timezone admin-timezone d-none">{{ interaction.answer_at | to_local_time(session['admin_user_timezone']) }}</span>
|
|
||||||
({{ interaction.question_at | time_difference(interaction.answer_at) }})
|
|
||||||
</span>
|
|
||||||
</div>
|
|
||||||
<div class="card-body">
|
|
||||||
<p><strong>Question:</strong> {{ interaction.question }}</p>
|
|
||||||
<p><strong>Answer:</strong> {{ interaction.answer }}</p>
|
|
||||||
<p>
|
|
||||||
<strong>Algorithm Used:</strong>
|
|
||||||
<i class="material-icons {{ 'fingerprint-rag-' ~ interaction.algorithm_used.lower() }}">
|
|
||||||
fingerprint
|
|
||||||
</i> {{ interaction.algorithm_used }}
|
|
||||||
</p>
|
|
||||||
<p>
|
|
||||||
<strong>Appreciation:</strong>
|
|
||||||
<i class="material-icons thumb-icon {{ 'thumb_up' if interaction.appreciation == 1 else 'thumb_down' }}">
|
|
||||||
{{ 'thumb_up' if interaction.appreciation == 1 else 'thumb_down' }}
|
|
||||||
</i>
|
|
||||||
</p>
|
|
||||||
<p><strong>Embeddings:</strong>
|
|
||||||
{% if interaction.embeddings %}
|
|
||||||
{% for embedding in interaction.embeddings %}
|
|
||||||
<a href="{{ url_for('interaction_bp.view_embedding', embedding_id=embedding.embedding_id) }}" class="badge badge-info">
|
|
||||||
{{ embedding.embedding_id }}
|
|
||||||
</a>
|
|
||||||
{% endfor %}
|
|
||||||
{% else %}
|
|
||||||
None
|
|
||||||
{% endif %}
|
|
||||||
</p>
|
|
||||||
</div>
|
|
||||||
</div>
|
</div>
|
||||||
|
</button>
|
||||||
|
</h2>
|
||||||
|
<div id="collapse{{ loop.index }}" class="accordion-collapse collapse" aria-labelledby="heading{{ loop.index }}"
|
||||||
|
data-bs-parent="#interactionsAccordion">
|
||||||
|
<div class="accordion-body">
|
||||||
|
<h6>Detailed Question:</h6>
|
||||||
|
<p>{{ interaction.detailed_question }}</p>
|
||||||
|
<h6>Answer:</h6>
|
||||||
|
<div class="markdown-content">{{ interaction.answer | safe }}</div>
|
||||||
|
{% if embeddings_dict.get(interaction.id) %}
|
||||||
|
<h6>Related Documents:</h6>
|
||||||
|
<ul>
|
||||||
|
{% for embedding in embeddings_dict[interaction.id] %}
|
||||||
|
<li>
|
||||||
|
{% if embedding.url %}
|
||||||
|
<a href="{{ embedding.url }}" target="_blank">{{ embedding.url }}</a>
|
||||||
|
{% else %}
|
||||||
|
{{ embedding.file_name }}
|
||||||
|
{% endif %}
|
||||||
|
</li>
|
||||||
|
{% endfor %}
|
||||||
|
</ul>
|
||||||
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
{% endfor %}
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
{% endfor %}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
{% endblock %}
|
{% endblock %}
|
||||||
|
|
||||||
{% block scripts %}
|
{% block scripts %}
|
||||||
|
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
|
||||||
<script>
|
<script>
|
||||||
document.addEventListener('DOMContentLoaded', function() {
|
document.addEventListener('DOMContentLoaded', function() {
|
||||||
// Elements to toggle
|
var markdownElements = document.querySelectorAll('.markdown-content');
|
||||||
const interactionTimes = document.querySelectorAll('.interaction-timezone');
|
markdownElements.forEach(function(el) {
|
||||||
const adminTimes = document.querySelectorAll('.admin-timezone');
|
el.innerHTML = marked.parse(el.textContent);
|
||||||
|
|
||||||
// Buttons
|
|
||||||
const interactionButton = document.getElementById('toggle-interaction-timezone');
|
|
||||||
const adminButton = document.getElementById('toggle-admin-timezone');
|
|
||||||
|
|
||||||
// Toggle to Interaction Timezone
|
|
||||||
interactionButton.addEventListener('click', function() {
|
|
||||||
interactionTimes.forEach(el => el.classList.remove('d-none'));
|
|
||||||
adminTimes.forEach(el => el.classList.add('d-none'));
|
|
||||||
interactionButton.classList.add('btn-primary');
|
|
||||||
interactionButton.classList.remove('btn-secondary');
|
|
||||||
adminButton.classList.add('btn-secondary');
|
|
||||||
adminButton.classList.remove('btn-primary');
|
|
||||||
});
|
|
||||||
|
|
||||||
// Toggle to Admin Timezone
|
|
||||||
adminButton.addEventListener('click', function() {
|
|
||||||
interactionTimes.forEach(el => el.classList.add('d-none'));
|
|
||||||
adminTimes.forEach(el => el.classList.remove('d-none'));
|
|
||||||
interactionButton.classList.add('btn-secondary');
|
|
||||||
interactionButton.classList.remove('btn-primary');
|
|
||||||
adminButton.classList.add('btn-primary');
|
|
||||||
adminButton.classList.remove('btn-secondary');
|
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
</script>
|
</script>
|
||||||
|
|||||||
@@ -84,7 +84,6 @@
|
|||||||
{'name': 'Add Document', 'url': '/document/add_document', 'roles': ['Super User', 'Tenant Admin']},
|
{'name': 'Add Document', 'url': '/document/add_document', 'roles': ['Super User', 'Tenant Admin']},
|
||||||
{'name': 'Add URL', 'url': '/document/add_url', 'roles': ['Super User', 'Tenant Admin']},
|
{'name': 'Add URL', 'url': '/document/add_url', 'roles': ['Super User', 'Tenant Admin']},
|
||||||
{'name': 'Add a list of URLs', 'url': '/document/add_urls', 'roles': ['Super User', 'Tenant Admin']},
|
{'name': 'Add a list of URLs', 'url': '/document/add_urls', 'roles': ['Super User', 'Tenant Admin']},
|
||||||
{'name': 'Add Youtube Document' , 'url': '/document/add_youtube', 'roles': ['Super User', 'Tenant Admin']},
|
|
||||||
{'name': 'All Documents', 'url': '/document/documents', 'roles': ['Super User', 'Tenant Admin']},
|
{'name': 'All Documents', 'url': '/document/documents', 'roles': ['Super User', 'Tenant Admin']},
|
||||||
{'name': 'All Document Versions', 'url': '/document/document_versions_list', 'roles': ['Super User', 'Tenant Admin']},
|
{'name': 'All Document Versions', 'url': '/document/document_versions_list', 'roles': ['Super User', 'Tenant Admin']},
|
||||||
{'name': 'Library Operations', 'url': '/document/library_operations', 'roles': ['Super User', 'Tenant Admin']},
|
{'name': 'Library Operations', 'url': '/document/library_operations', 'roles': ['Super User', 'Tenant Admin']},
|
||||||
|
|||||||
@@ -62,12 +62,19 @@
|
|||||||
{{ render_included_field(field, disabled_fields=license_fields, include_fields=license_fields) }}
|
{{ render_included_field(field, disabled_fields=license_fields, include_fields=license_fields) }}
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
<!-- Register API Key Button -->
|
<!-- Register API Key Button -->
|
||||||
|
<button type="button" class="btn btn-primary" onclick="generateNewChatApiKey()">Register Chat API Key</button>
|
||||||
<button type="button" class="btn btn-primary" onclick="generateNewApiKey()">Register API Key</button>
|
<button type="button" class="btn btn-primary" onclick="generateNewApiKey()">Register API Key</button>
|
||||||
<!-- API Key Display Field -->
|
<!-- API Key Display Field -->
|
||||||
|
<div id="chat-api-key-field" style="display:none;">
|
||||||
|
<label for="chat-api-key">Chat API Key:</label>
|
||||||
|
<input type="text" id="chat-api-key" class="form-control" readonly>
|
||||||
|
<button type="button" id="copy-chat-button" class="btn btn-primary">Copy to Clipboard</button>
|
||||||
|
<p id="copy-chat-message" style="display:none;color:green;">Chat API key copied to clipboard</p>
|
||||||
|
</div>
|
||||||
<div id="api-key-field" style="display:none;">
|
<div id="api-key-field" style="display:none;">
|
||||||
<label for="api-key">API Key:</label>
|
<label for="api-key">API Key:</label>
|
||||||
<input type="text" id="api-key" class="form-control" readonly>
|
<input type="text" id="api-key" class="form-control" readonly>
|
||||||
<button type="button" id="copy-button" class="btn btn-primary">Copy to Clipboard</button>
|
<button type="button" id="copy-api-button" class="btn btn-primary">Copy to Clipboard</button>
|
||||||
<p id="copy-message" style="display:none;color:green;">API key copied to clipboard</p>
|
<p id="copy-message" style="display:none;color:green;">API key copied to clipboard</p>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@@ -105,14 +112,25 @@
|
|||||||
|
|
||||||
{% block scripts %}
|
{% block scripts %}
|
||||||
<script>
|
<script>
|
||||||
|
// Function to generate a new Chat API Key
|
||||||
|
function generateNewChatApiKey() {
|
||||||
|
generateApiKey('/admin/user/generate_chat_api_key', '#chat-api-key', '#chat-api-key-field');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Function to generate a new general API Key
|
||||||
function generateNewApiKey() {
|
function generateNewApiKey() {
|
||||||
|
generateApiKey('/admin/user/generate_api_api_key', '#api-key', '#api-key-field');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reusable function to handle API key generation
|
||||||
|
function generateApiKey(url, inputSelector, fieldSelector) {
|
||||||
$.ajax({
|
$.ajax({
|
||||||
url: '/user/generate_chat_api_key',
|
url: url,
|
||||||
type: 'POST',
|
type: 'POST',
|
||||||
contentType: 'application/json',
|
contentType: 'application/json',
|
||||||
success: function(response) {
|
success: function(response) {
|
||||||
$('#api-key').val(response.api_key);
|
$(inputSelector).val(response.api_key);
|
||||||
$('#api-key-field').show();
|
$(fieldSelector).show();
|
||||||
},
|
},
|
||||||
error: function(error) {
|
error: function(error) {
|
||||||
alert('Error generating new API key: ' + error.responseText);
|
alert('Error generating new API key: ' + error.responseText);
|
||||||
@@ -120,25 +138,27 @@
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
function copyToClipboard(selector) {
|
// Function to copy text to clipboard
|
||||||
|
function copyToClipboard(selector, messageSelector) {
|
||||||
const element = document.querySelector(selector);
|
const element = document.querySelector(selector);
|
||||||
if (element) {
|
if (element) {
|
||||||
const text = element.value;
|
const text = element.value;
|
||||||
if (navigator.clipboard && navigator.clipboard.writeText) {
|
if (navigator.clipboard && navigator.clipboard.writeText) {
|
||||||
navigator.clipboard.writeText(text).then(function() {
|
navigator.clipboard.writeText(text).then(function() {
|
||||||
showCopyMessage();
|
showCopyMessage(messageSelector);
|
||||||
}).catch(function(error) {
|
}).catch(function(error) {
|
||||||
alert('Failed to copy text: ' + error);
|
alert('Failed to copy text: ' + error);
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
fallbackCopyToClipboard(text);
|
fallbackCopyToClipboard(text, messageSelector);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
console.error('Element not found for selector:', selector);
|
console.error('Element not found for selector:', selector);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function fallbackCopyToClipboard(text) {
|
// Fallback method for copying text to clipboard
|
||||||
|
function fallbackCopyToClipboard(text, messageSelector) {
|
||||||
const textArea = document.createElement('textarea');
|
const textArea = document.createElement('textarea');
|
||||||
textArea.value = text;
|
textArea.value = text;
|
||||||
document.body.appendChild(textArea);
|
document.body.appendChild(textArea);
|
||||||
@@ -146,15 +166,16 @@
|
|||||||
textArea.select();
|
textArea.select();
|
||||||
try {
|
try {
|
||||||
document.execCommand('copy');
|
document.execCommand('copy');
|
||||||
showCopyMessage();
|
showCopyMessage(messageSelector);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
alert('Fallback: Oops, unable to copy', err);
|
alert('Fallback: Oops, unable to copy', err);
|
||||||
}
|
}
|
||||||
document.body.removeChild(textArea);
|
document.body.removeChild(textArea);
|
||||||
}
|
}
|
||||||
|
|
||||||
function showCopyMessage() {
|
// Function to show copy confirmation message
|
||||||
const message = document.getElementById('copy-message');
|
function showCopyMessage(messageSelector) {
|
||||||
|
const message = document.querySelector(messageSelector);
|
||||||
if (message) {
|
if (message) {
|
||||||
message.style.display = 'block';
|
message.style.display = 'block';
|
||||||
setTimeout(function() {
|
setTimeout(function() {
|
||||||
@@ -163,8 +184,13 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
document.getElementById('copy-button').addEventListener('click', function() {
|
// Event listeners for copy buttons
|
||||||
copyToClipboard('#api-key');
|
document.getElementById('copy-chat-button').addEventListener('click', function() {
|
||||||
|
copyToClipboard('#chat-api-key', '#copy-chat-message');
|
||||||
|
});
|
||||||
|
|
||||||
|
document.getElementById('copy-api-button').addEventListener('click', function() {
|
||||||
|
copyToClipboard('#api-key', '#copy-message');
|
||||||
});
|
});
|
||||||
</script>
|
</script>
|
||||||
<script>
|
<script>
|
||||||
|
|||||||
@@ -1,14 +1,21 @@
|
|||||||
from flask import session
|
from flask import session, current_app
|
||||||
from flask_wtf import FlaskForm
|
from flask_wtf import FlaskForm
|
||||||
from wtforms import (StringField, BooleanField, SubmitField, DateField,
|
from wtforms import (StringField, BooleanField, SubmitField, DateField,
|
||||||
SelectField, FieldList, FormField, TextAreaField, URLField)
|
SelectField, FieldList, FormField, TextAreaField, URLField)
|
||||||
from wtforms.validators import DataRequired, Length, Optional, URL
|
from wtforms.validators import DataRequired, Length, Optional, URL, ValidationError
|
||||||
from flask_wtf.file import FileField, FileAllowed, FileRequired
|
from flask_wtf.file import FileField, FileAllowed, FileRequired
|
||||||
|
|
||||||
|
|
||||||
|
def allowed_file(form, field):
|
||||||
|
if field.data:
|
||||||
|
filename = field.data.filename
|
||||||
|
allowed_extensions = current_app.config.get('SUPPORTED_FILE_TYPES', [])
|
||||||
|
if not ('.' in filename and filename.rsplit('.', 1)[1].lower() in allowed_extensions):
|
||||||
|
raise ValidationError('Unsupported file type.')
|
||||||
|
|
||||||
|
|
||||||
class AddDocumentForm(FlaskForm):
|
class AddDocumentForm(FlaskForm):
|
||||||
file = FileField('File', validators=[FileAllowed(['pdf', 'txt', 'html']),
|
file = FileField('File', validators=[FileRequired(), allowed_file])
|
||||||
FileRequired()])
|
|
||||||
name = StringField('Name', validators=[Length(max=100)])
|
name = StringField('Name', validators=[Length(max=100)])
|
||||||
language = SelectField('Language', choices=[], validators=[Optional()])
|
language = SelectField('Language', choices=[], validators=[Optional()])
|
||||||
user_context = TextAreaField('User Context', validators=[Optional()])
|
user_context = TextAreaField('User Context', validators=[Optional()])
|
||||||
@@ -20,6 +27,7 @@ class AddDocumentForm(FlaskForm):
|
|||||||
super().__init__()
|
super().__init__()
|
||||||
self.language.choices = [(language, language) for language in
|
self.language.choices = [(language, language) for language in
|
||||||
session.get('tenant').get('allowed_languages')]
|
session.get('tenant').get('allowed_languages')]
|
||||||
|
self.language.data = session.get('tenant').get('default_language')
|
||||||
|
|
||||||
|
|
||||||
class AddURLForm(FlaskForm):
|
class AddURLForm(FlaskForm):
|
||||||
@@ -35,6 +43,7 @@ class AddURLForm(FlaskForm):
|
|||||||
super().__init__()
|
super().__init__()
|
||||||
self.language.choices = [(language, language) for language in
|
self.language.choices = [(language, language) for language in
|
||||||
session.get('tenant').get('allowed_languages')]
|
session.get('tenant').get('allowed_languages')]
|
||||||
|
self.language.data = session.get('tenant').get('default_language')
|
||||||
|
|
||||||
|
|
||||||
class AddURLsForm(FlaskForm):
|
class AddURLsForm(FlaskForm):
|
||||||
@@ -50,6 +59,7 @@ class AddURLsForm(FlaskForm):
|
|||||||
super().__init__()
|
super().__init__()
|
||||||
self.language.choices = [(language, language) for language in
|
self.language.choices = [(language, language) for language in
|
||||||
session.get('tenant').get('allowed_languages')]
|
session.get('tenant').get('allowed_languages')]
|
||||||
|
self.language.data = session.get('tenant').get('default_language')
|
||||||
|
|
||||||
|
|
||||||
class AddYoutubeForm(FlaskForm):
|
class AddYoutubeForm(FlaskForm):
|
||||||
|
|||||||
@@ -12,14 +12,17 @@ from werkzeug.utils import secure_filename
|
|||||||
from sqlalchemy.exc import SQLAlchemyError
|
from sqlalchemy.exc import SQLAlchemyError
|
||||||
import requests
|
import requests
|
||||||
from requests.exceptions import SSLError
|
from requests.exceptions import SSLError
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse, unquote
|
||||||
import io
|
import io
|
||||||
from minio.error import S3Error
|
from minio.error import S3Error
|
||||||
|
|
||||||
from common.models.document import Document, DocumentVersion
|
from common.models.document import Document, DocumentVersion
|
||||||
from common.extensions import db, minio_client
|
from common.extensions import db, minio_client
|
||||||
from .document_forms import AddDocumentForm, AddURLForm, EditDocumentForm, EditDocumentVersionForm, AddYoutubeForm, \
|
from common.utils.document_utils import validate_file_type, create_document_stack, start_embedding_task, process_url, \
|
||||||
AddURLsForm
|
process_multiple_urls, prepare_youtube_document, create_version_for_document, upload_file_for_version
|
||||||
|
from common.utils.eveai_exceptions import EveAIInvalidLanguageException, EveAIUnsupportedFileType, \
|
||||||
|
EveAIDoubleURLException, EveAIYoutubeError
|
||||||
|
from .document_forms import AddDocumentForm, AddURLForm, EditDocumentForm, EditDocumentVersionForm, AddURLsForm
|
||||||
from common.utils.middleware import mw_before_request
|
from common.utils.middleware import mw_before_request
|
||||||
from common.utils.celery_utils import current_celery
|
from common.utils.celery_utils import current_celery
|
||||||
from common.utils.nginx_utils import prefixed_url_for
|
from common.utils.nginx_utils import prefixed_url_for
|
||||||
@@ -56,30 +59,37 @@ def before_request():
|
|||||||
@roles_accepted('Super User', 'Tenant Admin')
|
@roles_accepted('Super User', 'Tenant Admin')
|
||||||
def add_document():
|
def add_document():
|
||||||
form = AddDocumentForm()
|
form = AddDocumentForm()
|
||||||
|
current_app.logger.debug('Adding document')
|
||||||
|
|
||||||
# If the form is submitted
|
|
||||||
if form.validate_on_submit():
|
if form.validate_on_submit():
|
||||||
current_app.logger.info(f'Adding document for tenant {session["tenant"]["id"]}')
|
try:
|
||||||
file = form.file.data
|
current_app.logger.debug('Validating file type')
|
||||||
filename = secure_filename(file.filename)
|
tenant_id = session['tenant']['id']
|
||||||
extension = filename.rsplit('.', 1)[1].lower()
|
file = form.file.data
|
||||||
form_dict = form_to_dict(form)
|
filename = secure_filename(file.filename)
|
||||||
|
extension = filename.rsplit('.', 1)[1].lower()
|
||||||
|
|
||||||
new_doc, new_doc_vers = create_document_stack(form_dict, file, filename, extension)
|
validate_file_type(extension)
|
||||||
|
|
||||||
task = current_celery.send_task('create_embeddings', queue='embeddings', args=[
|
api_input = {
|
||||||
session['tenant']['id'],
|
'name': form.name.data,
|
||||||
new_doc_vers.id,
|
'language': form.language.data,
|
||||||
])
|
'user_context': form.user_context.data,
|
||||||
current_app.logger.info(f'Embedding creation started for tenant {session["tenant"]["id"]}, '
|
'valid_from': form.valid_from.data
|
||||||
f'Document Version {new_doc_vers.id}. '
|
}
|
||||||
f'Embedding creation task: {task.id}')
|
|
||||||
flash(f'Processing on document {new_doc.name}, version {new_doc_vers.id} started. Task ID: {task.id}.',
|
|
||||||
'success')
|
|
||||||
|
|
||||||
return redirect(prefixed_url_for('document_bp.documents'))
|
new_doc, new_doc_vers = create_document_stack(api_input, file, filename, extension, tenant_id)
|
||||||
else:
|
task_id = start_embedding_task(tenant_id, new_doc_vers.id)
|
||||||
form_validation_failed(request, form)
|
|
||||||
|
flash(f'Processing on document {new_doc.name}, version {new_doc_vers.id} started. Task ID: {task_id}.',
|
||||||
|
'success')
|
||||||
|
return redirect(prefixed_url_for('document_bp.documents'))
|
||||||
|
|
||||||
|
except (EveAIInvalidLanguageException, EveAIUnsupportedFileType) as e:
|
||||||
|
flash(str(e), 'error')
|
||||||
|
except Exception as e:
|
||||||
|
current_app.logger.error(f'Error adding document: {str(e)}')
|
||||||
|
flash('An error occurred while adding the document.', 'error')
|
||||||
|
|
||||||
return render_template('document/add_document.html', form=form)
|
return render_template('document/add_document.html', form=form)
|
||||||
|
|
||||||
@@ -89,45 +99,35 @@ def add_document():
|
|||||||
def add_url():
|
def add_url():
|
||||||
form = AddURLForm()
|
form = AddURLForm()
|
||||||
|
|
||||||
# If the form is submitted
|
|
||||||
if form.validate_on_submit():
|
if form.validate_on_submit():
|
||||||
current_app.logger.info(f'Adding url for tenant {session["tenant"]["id"]}')
|
try:
|
||||||
url = form.url.data
|
tenant_id = session['tenant']['id']
|
||||||
|
url = form.url.data
|
||||||
|
|
||||||
doc_vers = DocumentVersion.query.filter_by(url=url).all()
|
file_content, filename, extension = process_url(url, tenant_id)
|
||||||
if doc_vers:
|
|
||||||
current_app.logger.info(f'A document with url {url} already exists. No new document created.')
|
api_input = {
|
||||||
flash(f'A document with url {url} already exists. No new document created.', 'info')
|
'name': form.name.data or filename,
|
||||||
|
'url': url,
|
||||||
|
'language': form.language.data,
|
||||||
|
'user_context': form.user_context.data,
|
||||||
|
'valid_from': form.valid_from.data
|
||||||
|
}
|
||||||
|
|
||||||
|
new_doc, new_doc_vers = create_document_stack(api_input, file_content, filename, extension, tenant_id)
|
||||||
|
task_id = start_embedding_task(tenant_id, new_doc_vers.id)
|
||||||
|
|
||||||
|
flash(f'Processing on document {new_doc.name}, version {new_doc_vers.id} started. Task ID: {task_id}.',
|
||||||
|
'success')
|
||||||
return redirect(prefixed_url_for('document_bp.documents'))
|
return redirect(prefixed_url_for('document_bp.documents'))
|
||||||
# Only when no document with URL exists
|
|
||||||
html = fetch_html(url)
|
|
||||||
file = io.BytesIO(html)
|
|
||||||
|
|
||||||
parsed_url = urlparse(url)
|
except EveAIDoubleURLException:
|
||||||
path_parts = parsed_url.path.split('/')
|
flash(f'A document with url {url} already exists. No new document created.', 'info')
|
||||||
filename = path_parts[-1]
|
except (EveAIInvalidLanguageException, EveAIUnsupportedFileType) as e:
|
||||||
if filename == '':
|
flash(str(e), 'error')
|
||||||
filename = 'index'
|
except Exception as e:
|
||||||
if not filename.endswith('.html'):
|
current_app.logger.error(f'Error adding document: {str(e)}')
|
||||||
filename += '.html'
|
flash('An error occurred while adding the document.', 'error')
|
||||||
extension = 'html'
|
|
||||||
form_dict = form_to_dict(form)
|
|
||||||
|
|
||||||
new_doc, new_doc_vers = create_document_stack(form_dict, file, filename, extension)
|
|
||||||
|
|
||||||
task = current_celery.send_task('create_embeddings', queue='embeddings', args=[
|
|
||||||
session['tenant']['id'],
|
|
||||||
new_doc_vers.id,
|
|
||||||
])
|
|
||||||
current_app.logger.info(f'Embedding creation started for tenant {session["tenant"]["id"]}, '
|
|
||||||
f'Document Version {new_doc_vers.id}. '
|
|
||||||
f'Embedding creation task: {task.id}')
|
|
||||||
flash(f'Processing on document {new_doc.name}, version {new_doc_vers.id} started. Task ID: {task.id}.',
|
|
||||||
'success')
|
|
||||||
|
|
||||||
return redirect(prefixed_url_for('document_bp.documents'))
|
|
||||||
else:
|
|
||||||
form_validation_failed(request, form)
|
|
||||||
|
|
||||||
return render_template('document/add_url.html', form=form)
|
return render_template('document/add_url.html', form=form)
|
||||||
|
|
||||||
@@ -138,100 +138,36 @@ def add_urls():
|
|||||||
form = AddURLsForm()
|
form = AddURLsForm()
|
||||||
|
|
||||||
if form.validate_on_submit():
|
if form.validate_on_submit():
|
||||||
urls = form.urls.data.split('\n')
|
try:
|
||||||
urls = [url.strip() for url in urls if url.strip()]
|
tenant_id = session['tenant']['id']
|
||||||
|
urls = form.urls.data.split('\n')
|
||||||
|
urls = [url.strip() for url in urls if url.strip()]
|
||||||
|
|
||||||
for i, url in enumerate(urls):
|
api_input = {
|
||||||
try:
|
'name': form.name.data,
|
||||||
doc_vers = DocumentVersion.query.filter_by(url=url).all()
|
'language': form.language.data,
|
||||||
if doc_vers:
|
'user_context': form.user_context.data,
|
||||||
current_app.logger.info(f'A document with url {url} already exists. No new document created.')
|
'valid_from': form.valid_from.data
|
||||||
flash(f'A document with url {url} already exists. No new document created.', 'info')
|
}
|
||||||
continue
|
|
||||||
|
|
||||||
html = fetch_html(url)
|
results = process_multiple_urls(urls, tenant_id, api_input)
|
||||||
file = io.BytesIO(html)
|
|
||||||
|
|
||||||
parsed_url = urlparse(url)
|
for result in results:
|
||||||
path_parts = parsed_url.path.split('/')
|
if result['status'] == 'success':
|
||||||
filename = path_parts[-1] if path_parts[-1] else 'index'
|
flash(
|
||||||
if not filename.endswith('.html'):
|
f"Processed URL: {result['url']} - Document ID: {result['document_id']}, Version ID: {result['document_version_id']}",
|
||||||
filename += '.html'
|
'success')
|
||||||
|
else:
|
||||||
|
flash(f"Error processing URL: {result['url']} - {result['message']}", 'error')
|
||||||
|
|
||||||
# Use the name prefix if provided, otherwise use the filename
|
return redirect(prefixed_url_for('document_bp.documents'))
|
||||||
doc_name = f"{form.name.data}-{filename}" if form.name.data else filename
|
|
||||||
|
|
||||||
new_doc, new_doc_vers = create_document_stack({
|
except Exception as e:
|
||||||
'name': doc_name,
|
current_app.logger.error(f'Error adding multiple URLs: {str(e)}')
|
||||||
'url': url,
|
flash('An error occurred while adding the URLs.', 'error')
|
||||||
'language': form.language.data,
|
|
||||||
'user_context': form.user_context.data,
|
|
||||||
'valid_from': form.valid_from.data
|
|
||||||
}, file, filename, 'html')
|
|
||||||
|
|
||||||
task = current_celery.send_task('create_embeddings', queue='embeddings', args=[
|
|
||||||
session['tenant']['id'],
|
|
||||||
new_doc_vers.id,
|
|
||||||
])
|
|
||||||
current_app.logger.info(f'Embedding creation started for tenant {session["tenant"]["id"]}, '
|
|
||||||
f'Document Version {new_doc_vers.id}. '
|
|
||||||
f'Embedding creation task: {task.id}')
|
|
||||||
flash(f'Processing on document {new_doc.name}, version {new_doc_vers.id} started. Task ID: {task.id}.',
|
|
||||||
'success')
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
current_app.logger.error(f"Error processing URL {url}: {str(e)}")
|
|
||||||
flash(f'Error processing URL {url}: {str(e)}', 'danger')
|
|
||||||
|
|
||||||
return redirect(prefixed_url_for('document_bp.documents'))
|
|
||||||
else:
|
|
||||||
form_validation_failed(request, form)
|
|
||||||
|
|
||||||
return render_template('document/add_urls.html', form=form)
|
return render_template('document/add_urls.html', form=form)
|
||||||
|
|
||||||
@document_bp.route('/add_youtube', methods=['GET', 'POST'])
|
|
||||||
@roles_accepted('Super User', 'Tenant Admin')
|
|
||||||
def add_youtube():
|
|
||||||
form = AddYoutubeForm()
|
|
||||||
|
|
||||||
if form.validate_on_submit():
|
|
||||||
current_app.logger.info(f'Adding Youtube document for tenant {session["tenant"]["id"]}')
|
|
||||||
url = form.url.data
|
|
||||||
current_app.logger.debug(f'Value of language field: {form.language.data}')
|
|
||||||
|
|
||||||
doc_vers = DocumentVersion.query.filter_by(url=url).all()
|
|
||||||
if doc_vers:
|
|
||||||
current_app.logger.info(f'A document with url {url} already exists. No new document created.')
|
|
||||||
flash(f'A document with url {url} already exists. No new document created.', 'info')
|
|
||||||
return redirect(prefixed_url_for('document_bp.documents'))
|
|
||||||
# As downloading a Youtube document can take quite some time, we offload this downloading to the worker
|
|
||||||
# We just pass a simple file to get things conform
|
|
||||||
file = "Youtube placeholder file"
|
|
||||||
|
|
||||||
filename = 'placeholder.youtube'
|
|
||||||
extension = 'youtube'
|
|
||||||
form_dict = form_to_dict(form)
|
|
||||||
current_app.logger.debug(f'Form data: {form_dict}')
|
|
||||||
|
|
||||||
new_doc, new_doc_vers = create_document_stack(form_dict, file, filename, extension)
|
|
||||||
|
|
||||||
task = current_celery.send_task('create_embeddings', queue='embeddings', args=[
|
|
||||||
session['tenant']['id'],
|
|
||||||
new_doc_vers.id,
|
|
||||||
])
|
|
||||||
current_app.logger.info(f'Processing and Embedding on Youtube document started for tenant '
|
|
||||||
f'{session["tenant"]["id"]}, '
|
|
||||||
f'Document Version {new_doc_vers.id}. '
|
|
||||||
f'Processing and Embedding Youtube task: {task.id}')
|
|
||||||
flash(f'Processing on Youtube document {new_doc.name}, version {new_doc_vers.id} started. Task ID: {task.id}.',
|
|
||||||
'success')
|
|
||||||
|
|
||||||
return redirect(prefixed_url_for('document_bp.documents'))
|
|
||||||
else:
|
|
||||||
form_validation_failed(request, form)
|
|
||||||
|
|
||||||
return render_template('document/add_youtube.html', form=form)
|
|
||||||
|
|
||||||
|
|
||||||
@document_bp.route('/documents', methods=['GET', 'POST'])
|
@document_bp.route('/documents', methods=['GET', 'POST'])
|
||||||
@roles_accepted('Super User', 'Tenant Admin')
|
@roles_accepted('Super User', 'Tenant Admin')
|
||||||
@@ -358,6 +294,8 @@ def handle_document_version_selection():
|
|||||||
|
|
||||||
action = request.form['action']
|
action = request.form['action']
|
||||||
|
|
||||||
|
current_app.logger.debug(f'Triggered Document Version Action: {action}')
|
||||||
|
|
||||||
match action:
|
match action:
|
||||||
case 'edit_document_version':
|
case 'edit_document_version':
|
||||||
return redirect(prefixed_url_for('document_bp.edit_document_version', document_version_id=doc_vers_id))
|
return redirect(prefixed_url_for('document_bp.edit_document_version', document_version_id=doc_vers_id))
|
||||||
@@ -441,7 +379,7 @@ def refresh_document(doc_id):
|
|||||||
current_app.logger.info(f'Document added successfully for tenant {session["tenant"]["id"]}, '
|
current_app.logger.info(f'Document added successfully for tenant {session["tenant"]["id"]}, '
|
||||||
f'Document Version {new_doc_vers.id}')
|
f'Document Version {new_doc_vers.id}')
|
||||||
|
|
||||||
upload_file_for_version(new_doc_vers, file, extension)
|
upload_file_for_version(new_doc_vers, file, extension, session["tenant"]["id"])
|
||||||
|
|
||||||
task = current_celery.send_task('create_embeddings', queue='embeddings', args=[
|
task = current_celery.send_task('create_embeddings', queue='embeddings', args=[
|
||||||
session['tenant']['id'],
|
session['tenant']['id'],
|
||||||
@@ -489,116 +427,11 @@ def update_logging_information(obj, timestamp):
|
|||||||
obj.updated_by = current_user.id
|
obj.updated_by = current_user.id
|
||||||
|
|
||||||
|
|
||||||
def create_document_stack(form, file, filename, extension):
|
|
||||||
# Create the Document
|
|
||||||
new_doc = create_document(form, filename)
|
|
||||||
|
|
||||||
# Create the DocumentVersion
|
|
||||||
new_doc_vers = create_version_for_document(new_doc,
|
|
||||||
form.get('url', ''),
|
|
||||||
form.get('language', 'en'),
|
|
||||||
form.get('user_context', '')
|
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
|
||||||
db.session.add(new_doc)
|
|
||||||
db.session.add(new_doc_vers)
|
|
||||||
db.session.commit()
|
|
||||||
except SQLAlchemyError as e:
|
|
||||||
current_app.logger.error(f'Error adding document for tenant {session["tenant"]["id"]}: {e}')
|
|
||||||
flash('Error adding document.', 'alert')
|
|
||||||
db.session.rollback()
|
|
||||||
error = e.args
|
|
||||||
raise
|
|
||||||
except Exception as e:
|
|
||||||
current_app.logger.error('Unknown error')
|
|
||||||
raise
|
|
||||||
|
|
||||||
current_app.logger.info(f'Document added successfully for tenant {session["tenant"]["id"]}, '
|
|
||||||
f'Document Version {new_doc.id}')
|
|
||||||
|
|
||||||
upload_file_for_version(new_doc_vers, file, extension)
|
|
||||||
|
|
||||||
return new_doc, new_doc_vers
|
|
||||||
|
|
||||||
|
|
||||||
def log_session_state(session, msg=""):
|
def log_session_state(session, msg=""):
|
||||||
current_app.logger.debug(f"{msg} - Session dirty: {session.dirty}")
|
current_app.logger.debug(f"{msg} - Session dirty: {session.dirty}")
|
||||||
current_app.logger.debug(f"{msg} - Session new: {session.new}")
|
current_app.logger.debug(f"{msg} - Session new: {session.new}")
|
||||||
|
|
||||||
|
|
||||||
def create_document(form, filename):
|
|
||||||
new_doc = Document()
|
|
||||||
if form['name'] == '':
|
|
||||||
new_doc.name = filename.rsplit('.', 1)[0]
|
|
||||||
else:
|
|
||||||
new_doc.name = form['name']
|
|
||||||
|
|
||||||
if form['valid_from'] and form['valid_from'] != '':
|
|
||||||
new_doc.valid_from = form['valid_from']
|
|
||||||
else:
|
|
||||||
new_doc.valid_from = dt.now(tz.utc)
|
|
||||||
new_doc.tenant_id = session['tenant']['id']
|
|
||||||
set_logging_information(new_doc, dt.now(tz.utc))
|
|
||||||
|
|
||||||
return new_doc
|
|
||||||
|
|
||||||
|
|
||||||
def create_version_for_document(document, url, language, user_context):
|
|
||||||
new_doc_vers = DocumentVersion()
|
|
||||||
if url != '':
|
|
||||||
new_doc_vers.url = url
|
|
||||||
|
|
||||||
if language == '':
|
|
||||||
new_doc_vers.language = session['default_language']
|
|
||||||
else:
|
|
||||||
new_doc_vers.language = language
|
|
||||||
|
|
||||||
if user_context != '':
|
|
||||||
new_doc_vers.user_context = user_context
|
|
||||||
|
|
||||||
new_doc_vers.document = document
|
|
||||||
|
|
||||||
set_logging_information(new_doc_vers, dt.now(tz.utc))
|
|
||||||
|
|
||||||
return new_doc_vers
|
|
||||||
|
|
||||||
|
|
||||||
def upload_file_for_version(doc_vers, file, extension):
|
|
||||||
doc_vers.file_type = extension
|
|
||||||
doc_vers.file_name = doc_vers.calc_file_name()
|
|
||||||
doc_vers.file_location = doc_vers.calc_file_location()
|
|
||||||
|
|
||||||
# Normally, the tenant bucket should exist. But let's be on the safe side if a migration took place.
|
|
||||||
tenant_id = session['tenant']['id']
|
|
||||||
minio_client.create_tenant_bucket(tenant_id)
|
|
||||||
|
|
||||||
try:
|
|
||||||
minio_client.upload_document_file(
|
|
||||||
tenant_id,
|
|
||||||
doc_vers.doc_id,
|
|
||||||
doc_vers.language,
|
|
||||||
doc_vers.id,
|
|
||||||
doc_vers.file_name,
|
|
||||||
file
|
|
||||||
)
|
|
||||||
db.session.commit()
|
|
||||||
current_app.logger.info(f'Successfully saved document to MinIO for tenant {tenant_id} for '
|
|
||||||
f'document version {doc_vers.id} while uploading file.')
|
|
||||||
except S3Error as e:
|
|
||||||
db.session.rollback()
|
|
||||||
flash('Error saving document to MinIO.', 'error')
|
|
||||||
current_app.logger.error(
|
|
||||||
f'Error saving document to MinIO for tenant {tenant_id}: {e}')
|
|
||||||
raise
|
|
||||||
except SQLAlchemyError as e:
|
|
||||||
db.session.rollback()
|
|
||||||
flash('Error saving document metadata.', 'error')
|
|
||||||
current_app.logger.error(
|
|
||||||
f'Error saving document metadata for tenant {tenant_id}: {e}')
|
|
||||||
raise
|
|
||||||
|
|
||||||
|
|
||||||
def fetch_html(url):
|
def fetch_html(url):
|
||||||
# Fetches HTML content from a URL
|
# Fetches HTML content from a URL
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -15,7 +15,8 @@ from requests.exceptions import SSLError
|
|||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
import io
|
import io
|
||||||
|
|
||||||
from common.models.interaction import ChatSession, Interaction
|
from common.models.document import Embedding, DocumentVersion
|
||||||
|
from common.models.interaction import ChatSession, Interaction, InteractionEmbedding
|
||||||
from common.extensions import db
|
from common.extensions import db
|
||||||
from .document_forms import AddDocumentForm, AddURLForm, EditDocumentForm, EditDocumentVersionForm
|
from .document_forms import AddDocumentForm, AddURLForm, EditDocumentForm, EditDocumentVersionForm
|
||||||
from common.utils.middleware import mw_before_request
|
from common.utils.middleware import mw_before_request
|
||||||
@@ -80,11 +81,34 @@ def handle_chat_session_selection():
|
|||||||
return redirect(prefixed_url_for('interaction_bp.chat_sessions'))
|
return redirect(prefixed_url_for('interaction_bp.chat_sessions'))
|
||||||
|
|
||||||
|
|
||||||
@interaction_bp.route('/view_chat_session/<chat_session_id>', methods=['GET'])
|
@interaction_bp.route('/view_chat_session/<int:chat_session_id>', methods=['GET'])
|
||||||
@roles_accepted('Super User', 'Tenant Admin')
|
@roles_accepted('Super User', 'Tenant Admin')
|
||||||
def view_chat_session(chat_session_id):
|
def view_chat_session(chat_session_id):
|
||||||
chat_session = ChatSession.query.get_or_404(chat_session_id)
|
chat_session = ChatSession.query.get_or_404(chat_session_id)
|
||||||
show_chat_session(chat_session)
|
interactions = (Interaction.query
|
||||||
|
.filter_by(chat_session_id=chat_session.id)
|
||||||
|
.order_by(Interaction.question_at)
|
||||||
|
.all())
|
||||||
|
|
||||||
|
# Fetch all related embeddings for the interactions in this session
|
||||||
|
embedding_query = (db.session.query(InteractionEmbedding.interaction_id,
|
||||||
|
DocumentVersion.url,
|
||||||
|
DocumentVersion.file_name)
|
||||||
|
.join(Embedding, InteractionEmbedding.embedding_id == Embedding.id)
|
||||||
|
.join(DocumentVersion, Embedding.doc_vers_id == DocumentVersion.id)
|
||||||
|
.filter(InteractionEmbedding.interaction_id.in_([i.id for i in interactions])))
|
||||||
|
|
||||||
|
# Create a dictionary to store embeddings for each interaction
|
||||||
|
embeddings_dict = {}
|
||||||
|
for interaction_id, url, file_name in embedding_query:
|
||||||
|
if interaction_id not in embeddings_dict:
|
||||||
|
embeddings_dict[interaction_id] = []
|
||||||
|
embeddings_dict[interaction_id].append({'url': url, 'file_name': file_name})
|
||||||
|
|
||||||
|
return render_template('interaction/view_chat_session.html',
|
||||||
|
chat_session=chat_session,
|
||||||
|
interactions=interactions,
|
||||||
|
embeddings_dict=embeddings_dict)
|
||||||
|
|
||||||
|
|
||||||
@interaction_bp.route('/view_chat_session_by_session_id/<session_id>', methods=['GET'])
|
@interaction_bp.route('/view_chat_session_by_session_id/<session_id>', methods=['GET'])
|
||||||
|
|||||||
@@ -32,6 +32,7 @@ class TenantForm(FlaskForm):
|
|||||||
default='p, li')
|
default='p, li')
|
||||||
html_included_elements = StringField('HTML Included Elements', validators=[Optional()])
|
html_included_elements = StringField('HTML Included Elements', validators=[Optional()])
|
||||||
html_excluded_elements = StringField('HTML Excluded Elements', validators=[Optional()])
|
html_excluded_elements = StringField('HTML Excluded Elements', validators=[Optional()])
|
||||||
|
html_excluded_classes = StringField('HTML Excluded Classes', validators=[Optional()])
|
||||||
min_chunk_size = IntegerField('Minimum Chunk Size (2000)', validators=[NumberRange(min=0), Optional()], default=2000)
|
min_chunk_size = IntegerField('Minimum Chunk Size (2000)', validators=[NumberRange(min=0), Optional()], default=2000)
|
||||||
max_chunk_size = IntegerField('Maximum Chunk Size (3000)', validators=[NumberRange(min=0), Optional()], default=3000)
|
max_chunk_size = IntegerField('Maximum Chunk Size (3000)', validators=[NumberRange(min=0), Optional()], default=3000)
|
||||||
# Embedding Search variables
|
# Embedding Search variables
|
||||||
|
|||||||
@@ -68,6 +68,8 @@ def tenant():
|
|||||||
if form.html_included_elements.data else []
|
if form.html_included_elements.data else []
|
||||||
new_tenant.html_excluded_elements = [tag.strip() for tag in form.html_excluded_elements.data.split(',')] \
|
new_tenant.html_excluded_elements = [tag.strip() for tag in form.html_excluded_elements.data.split(',')] \
|
||||||
if form.html_excluded_elements.data else []
|
if form.html_excluded_elements.data else []
|
||||||
|
new_tenant.html_excluded_classes = [cls.strip() for cls in form.html_excluded_classes.data.split(',')] \
|
||||||
|
if form.html_excluded_classes.data else []
|
||||||
|
|
||||||
current_app.logger.debug(f'html_tags: {new_tenant.html_tags},'
|
current_app.logger.debug(f'html_tags: {new_tenant.html_tags},'
|
||||||
f'html_end_tags: {new_tenant.html_end_tags},'
|
f'html_end_tags: {new_tenant.html_end_tags},'
|
||||||
@@ -123,6 +125,8 @@ def edit_tenant(tenant_id):
|
|||||||
form.html_included_elements.data = ', '.join(tenant.html_included_elements)
|
form.html_included_elements.data = ', '.join(tenant.html_included_elements)
|
||||||
if tenant.html_excluded_elements:
|
if tenant.html_excluded_elements:
|
||||||
form.html_excluded_elements.data = ', '.join(tenant.html_excluded_elements)
|
form.html_excluded_elements.data = ', '.join(tenant.html_excluded_elements)
|
||||||
|
if tenant.html_excluded_classes:
|
||||||
|
form.html_excluded_classes.data = ', '.join(tenant.html_excluded_classes)
|
||||||
|
|
||||||
if form.validate_on_submit():
|
if form.validate_on_submit():
|
||||||
# Populate the tenant with form data
|
# Populate the tenant with form data
|
||||||
@@ -134,6 +138,8 @@ def edit_tenant(tenant_id):
|
|||||||
elem.strip()]
|
elem.strip()]
|
||||||
tenant.html_excluded_elements = [elem.strip() for elem in form.html_excluded_elements.data.split(',') if
|
tenant.html_excluded_elements = [elem.strip() for elem in form.html_excluded_elements.data.split(',') if
|
||||||
elem.strip()]
|
elem.strip()]
|
||||||
|
tenant.html_excluded_classes = [elem.strip() for elem in form.html_excluded_classes.data.split(',') if
|
||||||
|
elem.strip()]
|
||||||
|
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
flash('Tenant updated successfully.', 'success')
|
flash('Tenant updated successfully.', 'success')
|
||||||
@@ -429,6 +435,36 @@ def generate_chat_api_key():
|
|||||||
tenant.encrypted_chat_api_key = simple_encryption.encrypt_api_key(new_api_key)
|
tenant.encrypted_chat_api_key = simple_encryption.encrypt_api_key(new_api_key)
|
||||||
update_logging_information(tenant, dt.now(tz.utc))
|
update_logging_information(tenant, dt.now(tz.utc))
|
||||||
|
|
||||||
|
try:
|
||||||
|
db.session.add(tenant)
|
||||||
|
db.session.commit()
|
||||||
|
except SQLAlchemyError as e:
|
||||||
|
db.session.rollback()
|
||||||
|
current_app.logger.error(f'Unable to store chat api key for tenant {tenant.id}. Error: {str(e)}')
|
||||||
|
|
||||||
|
return jsonify({'api_key': new_api_key}), 200
|
||||||
|
|
||||||
|
|
||||||
|
@user_bp.route('/check_api_api_key', methods=['POST'])
|
||||||
|
@roles_accepted('Super User', 'Tenant Admin')
|
||||||
|
def check_api_api_key():
|
||||||
|
tenant_id = session['tenant']['id']
|
||||||
|
tenant = Tenant.query.get_or_404(tenant_id)
|
||||||
|
|
||||||
|
if tenant.encrypted_api_key:
|
||||||
|
return jsonify({'api_key_exists': True})
|
||||||
|
return jsonify({'api_key_exists': False})
|
||||||
|
|
||||||
|
|
||||||
|
@user_bp.route('/generate_api_api_key', methods=['POST'])
|
||||||
|
@roles_accepted('Super User', 'Tenant Admin')
|
||||||
|
def generate_api_api_key():
|
||||||
|
tenant = Tenant.query.get_or_404(session['tenant']['id'])
|
||||||
|
|
||||||
|
new_api_key = generate_api_key(prefix="EveAI-API")
|
||||||
|
tenant.encrypted_api_key = simple_encryption.encrypt_api_key(new_api_key)
|
||||||
|
update_logging_information(tenant, dt.now(tz.utc))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
db.session.add(tenant)
|
db.session.add(tenant)
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
|
|||||||
187
eveai_workers/Processors/audio_processor.py
Normal file
187
eveai_workers/Processors/audio_processor.py
Normal file
@@ -0,0 +1,187 @@
|
|||||||
|
import io
|
||||||
|
import os
|
||||||
|
from pydub import AudioSegment
|
||||||
|
import tempfile
|
||||||
|
from langchain_core.output_parsers import StrOutputParser
|
||||||
|
from langchain_core.prompts import ChatPromptTemplate
|
||||||
|
from langchain_core.runnables import RunnablePassthrough
|
||||||
|
from common.extensions import minio_client
|
||||||
|
from common.utils.model_utils import create_language_template
|
||||||
|
from .processor import Processor
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
|
||||||
|
class AudioProcessor(Processor):
|
||||||
|
def __init__(self, tenant, model_variables, document_version):
|
||||||
|
super().__init__(tenant, model_variables, document_version)
|
||||||
|
self.transcription_client = model_variables['transcription_client']
|
||||||
|
self.transcription_model = model_variables['transcription_model']
|
||||||
|
self.ffmpeg_path = 'ffmpeg'
|
||||||
|
|
||||||
|
|
||||||
|
def process(self):
|
||||||
|
self._log("Starting Audio processing")
|
||||||
|
try:
|
||||||
|
file_data = minio_client.download_document_file(
|
||||||
|
self.tenant.id,
|
||||||
|
self.document_version.doc_id,
|
||||||
|
self.document_version.language,
|
||||||
|
self.document_version.id,
|
||||||
|
self.document_version.file_name
|
||||||
|
)
|
||||||
|
|
||||||
|
compressed_audio = self._compress_audio(file_data)
|
||||||
|
transcription = self._transcribe_audio(compressed_audio)
|
||||||
|
markdown, title = self._generate_markdown_from_transcription(transcription)
|
||||||
|
|
||||||
|
self._save_markdown(markdown)
|
||||||
|
self._log("Finished processing Audio")
|
||||||
|
return markdown, title
|
||||||
|
except Exception as e:
|
||||||
|
self._log(f"Error processing Audio: {str(e)}", level='error')
|
||||||
|
raise
|
||||||
|
|
||||||
|
def _compress_audio(self, audio_data):
|
||||||
|
self._log("Compressing audio")
|
||||||
|
with tempfile.NamedTemporaryFile(delete=False, suffix=f'.{self.document_version.file_type}') as temp_input:
|
||||||
|
temp_input.write(audio_data)
|
||||||
|
temp_input.flush()
|
||||||
|
|
||||||
|
# Use a unique filename for the output to avoid conflicts
|
||||||
|
output_filename = f'compressed_{os.urandom(8).hex()}.mp3'
|
||||||
|
output_path = os.path.join(tempfile.gettempdir(), output_filename)
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
[self.ffmpeg_path, '-y', '-i', temp_input.name, '-b:a', '64k', '-f', 'mp3', output_path],
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
check=True
|
||||||
|
)
|
||||||
|
|
||||||
|
with open(output_path, 'rb') as f:
|
||||||
|
compressed_data = f.read()
|
||||||
|
|
||||||
|
# Save compressed audio to MinIO
|
||||||
|
compressed_filename = f"{self.document_version.id}_compressed.mp3"
|
||||||
|
minio_client.upload_document_file(
|
||||||
|
self.tenant.id,
|
||||||
|
self.document_version.doc_id,
|
||||||
|
self.document_version.language,
|
||||||
|
self.document_version.id,
|
||||||
|
compressed_filename,
|
||||||
|
compressed_data
|
||||||
|
)
|
||||||
|
self._log(f"Saved compressed audio to MinIO: {compressed_filename}")
|
||||||
|
|
||||||
|
return compressed_data
|
||||||
|
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
error_message = f"Compression failed: {e.stderr}"
|
||||||
|
self._log(error_message, level='error')
|
||||||
|
raise Exception(error_message)
|
||||||
|
|
||||||
|
finally:
|
||||||
|
# Clean up temporary files
|
||||||
|
os.unlink(temp_input.name)
|
||||||
|
if os.path.exists(output_path):
|
||||||
|
os.unlink(output_path)
|
||||||
|
|
||||||
|
def _transcribe_audio(self, audio_data):
|
||||||
|
self._log("Starting audio transcription")
|
||||||
|
audio = AudioSegment.from_file(io.BytesIO(audio_data), format="mp3")
|
||||||
|
|
||||||
|
segment_length = 10 * 60 * 1000 # 10 minutes in milliseconds
|
||||||
|
transcriptions = []
|
||||||
|
|
||||||
|
for i, chunk in enumerate(audio[::segment_length]):
|
||||||
|
self._log(f'Processing chunk {i + 1} of {len(audio) // segment_length + 1}')
|
||||||
|
|
||||||
|
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio:
|
||||||
|
chunk.export(temp_audio.name, format="mp3")
|
||||||
|
temp_audio.flush()
|
||||||
|
|
||||||
|
try:
|
||||||
|
file_size = os.path.getsize(temp_audio.name)
|
||||||
|
self._log(f"Temporary audio file size: {file_size} bytes")
|
||||||
|
|
||||||
|
with open(temp_audio.name, 'rb') as audio_file:
|
||||||
|
file_start = audio_file.read(100)
|
||||||
|
self._log(f"First 100 bytes of audio file: {file_start}")
|
||||||
|
audio_file.seek(0) # Reset file pointer to the beginning
|
||||||
|
|
||||||
|
self._log("Calling transcription API")
|
||||||
|
transcription = self.transcription_client.audio.transcriptions.create(
|
||||||
|
file=audio_file,
|
||||||
|
model=self.transcription_model,
|
||||||
|
language=self.document_version.language,
|
||||||
|
response_format='verbose_json',
|
||||||
|
)
|
||||||
|
self._log("Transcription API call completed")
|
||||||
|
|
||||||
|
if transcription:
|
||||||
|
# Handle the transcription result based on its type
|
||||||
|
if isinstance(transcription, str):
|
||||||
|
self._log(f"Transcription result (string): {transcription[:100]}...")
|
||||||
|
transcriptions.append(transcription)
|
||||||
|
elif hasattr(transcription, 'text'):
|
||||||
|
self._log(
|
||||||
|
f"Transcription result (object with 'text' attribute): {transcription.text[:100]}...")
|
||||||
|
transcriptions.append(transcription.text)
|
||||||
|
else:
|
||||||
|
self._log(f"Transcription result (unknown type): {str(transcription)[:100]}...")
|
||||||
|
transcriptions.append(str(transcription))
|
||||||
|
else:
|
||||||
|
self._log("Warning: Received empty transcription", level='warning')
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self._log(f"Error during transcription: {str(e)}", level='error')
|
||||||
|
finally:
|
||||||
|
os.unlink(temp_audio.name)
|
||||||
|
|
||||||
|
full_transcription = " ".join(filter(None, transcriptions))
|
||||||
|
|
||||||
|
if not full_transcription:
|
||||||
|
self._log("Warning: No transcription was generated", level='warning')
|
||||||
|
full_transcription = "No transcription available."
|
||||||
|
|
||||||
|
# Save transcription to MinIO
|
||||||
|
transcription_filename = f"{self.document_version.id}_transcription.txt"
|
||||||
|
minio_client.upload_document_file(
|
||||||
|
self.tenant.id,
|
||||||
|
self.document_version.doc_id,
|
||||||
|
self.document_version.language,
|
||||||
|
self.document_version.id,
|
||||||
|
transcription_filename,
|
||||||
|
full_transcription.encode('utf-8')
|
||||||
|
)
|
||||||
|
self._log(f"Saved transcription to MinIO: {transcription_filename}")
|
||||||
|
|
||||||
|
return full_transcription
|
||||||
|
|
||||||
|
def _generate_markdown_from_transcription(self, transcription):
|
||||||
|
self._log("Generating markdown from transcription")
|
||||||
|
llm = self.model_variables['llm']
|
||||||
|
template = self.model_variables['transcript_template']
|
||||||
|
language_template = create_language_template(template, self.document_version.language)
|
||||||
|
transcript_prompt = ChatPromptTemplate.from_template(language_template)
|
||||||
|
setup = RunnablePassthrough()
|
||||||
|
output_parser = StrOutputParser()
|
||||||
|
|
||||||
|
chain = setup | transcript_prompt | llm | output_parser
|
||||||
|
|
||||||
|
input_transcript = {'transcript': transcription}
|
||||||
|
markdown = chain.invoke(input_transcript)
|
||||||
|
|
||||||
|
# Extract title from the markdown
|
||||||
|
title = self._extract_title_from_markdown(markdown)
|
||||||
|
|
||||||
|
return markdown, title
|
||||||
|
|
||||||
|
def _extract_title_from_markdown(self, markdown):
|
||||||
|
# Simple extraction of the first header as the title
|
||||||
|
lines = markdown.split('\n')
|
||||||
|
for line in lines:
|
||||||
|
if line.startswith('# '):
|
||||||
|
return line[2:].strip()
|
||||||
|
return "Untitled Audio Transcription"
|
||||||
142
eveai_workers/Processors/html_processor.py
Normal file
142
eveai_workers/Processors/html_processor.py
Normal file
@@ -0,0 +1,142 @@
|
|||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from langchain_core.output_parsers import StrOutputParser
|
||||||
|
from langchain_core.prompts import ChatPromptTemplate
|
||||||
|
from langchain_core.runnables import RunnablePassthrough
|
||||||
|
from common.extensions import db, minio_client
|
||||||
|
from common.utils.model_utils import create_language_template
|
||||||
|
from .processor import Processor
|
||||||
|
|
||||||
|
|
||||||
|
class HTMLProcessor(Processor):
|
||||||
|
def __init__(self, tenant, model_variables, document_version):
|
||||||
|
super().__init__(tenant, model_variables, document_version)
|
||||||
|
self.html_tags = model_variables['html_tags']
|
||||||
|
self.html_end_tags = model_variables['html_end_tags']
|
||||||
|
self.html_included_elements = model_variables['html_included_elements']
|
||||||
|
self.html_excluded_elements = model_variables['html_excluded_elements']
|
||||||
|
|
||||||
|
def process(self):
|
||||||
|
self._log("Starting HTML processing")
|
||||||
|
try:
|
||||||
|
file_data = minio_client.download_document_file(
|
||||||
|
self.tenant.id,
|
||||||
|
self.document_version.doc_id,
|
||||||
|
self.document_version.language,
|
||||||
|
self.document_version.id,
|
||||||
|
self.document_version.file_name
|
||||||
|
)
|
||||||
|
html_content = file_data.decode('utf-8')
|
||||||
|
|
||||||
|
extracted_html, title = self._parse_html(html_content)
|
||||||
|
markdown = self._generate_markdown_from_html(extracted_html)
|
||||||
|
|
||||||
|
self._save_markdown(markdown)
|
||||||
|
self._log("Finished processing HTML")
|
||||||
|
return markdown, title
|
||||||
|
except Exception as e:
|
||||||
|
self._log(f"Error processing HTML: {str(e)}", level='error')
|
||||||
|
raise
|
||||||
|
|
||||||
|
def _parse_html(self, html_content):
|
||||||
|
self._log(f'Parsing HTML for tenant {self.tenant.id}')
|
||||||
|
soup = BeautifulSoup(html_content, 'html.parser')
|
||||||
|
extracted_html = ''
|
||||||
|
excluded_classes = self._parse_excluded_classes(self.tenant.html_excluded_classes)
|
||||||
|
|
||||||
|
if self.html_included_elements:
|
||||||
|
elements_to_parse = soup.find_all(self.html_included_elements)
|
||||||
|
else:
|
||||||
|
elements_to_parse = [soup]
|
||||||
|
|
||||||
|
for element in elements_to_parse:
|
||||||
|
for sub_element in element.find_all(self.html_tags):
|
||||||
|
if self._should_exclude_element(sub_element, excluded_classes):
|
||||||
|
continue
|
||||||
|
extracted_html += self._extract_element_content(sub_element)
|
||||||
|
|
||||||
|
title = soup.find('title').get_text(strip=True) if soup.find('title') else ''
|
||||||
|
|
||||||
|
self._log(f'Finished parsing HTML for tenant {self.tenant.id}')
|
||||||
|
return extracted_html, title
|
||||||
|
|
||||||
|
def _generate_markdown_from_html(self, html_content):
|
||||||
|
self._log(f'Generating markdown from HTML for tenant {self.tenant.id}')
|
||||||
|
|
||||||
|
llm = self.model_variables['llm']
|
||||||
|
template = self.model_variables['html_parse_template']
|
||||||
|
parse_prompt = ChatPromptTemplate.from_template(template)
|
||||||
|
setup = RunnablePassthrough()
|
||||||
|
output_parser = StrOutputParser()
|
||||||
|
chain = setup | parse_prompt | llm | output_parser
|
||||||
|
|
||||||
|
soup = BeautifulSoup(html_content, 'lxml')
|
||||||
|
chunks = self._split_content(soup)
|
||||||
|
|
||||||
|
markdown_chunks = []
|
||||||
|
for chunk in chunks:
|
||||||
|
if self.embed_tuning:
|
||||||
|
self._log(f'Processing chunk: \n{chunk}\n')
|
||||||
|
input_html = {"html": chunk}
|
||||||
|
markdown_chunk = chain.invoke(input_html)
|
||||||
|
markdown_chunks.append(markdown_chunk)
|
||||||
|
if self.embed_tuning:
|
||||||
|
self._log(f'Processed markdown chunk: \n{markdown_chunk}\n')
|
||||||
|
|
||||||
|
markdown = "\n\n".join(markdown_chunks)
|
||||||
|
self._log(f'Finished generating markdown from HTML for tenant {self.tenant.id}')
|
||||||
|
return markdown
|
||||||
|
|
||||||
|
def _split_content(self, soup, max_size=20000):
|
||||||
|
chunks = []
|
||||||
|
current_chunk = []
|
||||||
|
current_size = 0
|
||||||
|
|
||||||
|
for element in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'div', 'span', 'table']):
|
||||||
|
element_html = str(element)
|
||||||
|
element_size = len(element_html)
|
||||||
|
|
||||||
|
if current_size + element_size > max_size and current_chunk:
|
||||||
|
chunks.append(''.join(map(str, current_chunk)))
|
||||||
|
current_chunk = []
|
||||||
|
current_size = 0
|
||||||
|
|
||||||
|
current_chunk.append(element)
|
||||||
|
current_size += element_size
|
||||||
|
|
||||||
|
if element.name in ['h1', 'h2', 'h3'] and current_size > max_size:
|
||||||
|
chunks.append(''.join(map(str, current_chunk)))
|
||||||
|
current_chunk = []
|
||||||
|
current_size = 0
|
||||||
|
|
||||||
|
if current_chunk:
|
||||||
|
chunks.append(''.join(map(str, current_chunk)))
|
||||||
|
|
||||||
|
return chunks
|
||||||
|
|
||||||
|
def _parse_excluded_classes(self, excluded_classes):
|
||||||
|
parsed = {}
|
||||||
|
for rule in excluded_classes:
|
||||||
|
element, cls = rule.split('.', 1)
|
||||||
|
parsed.setdefault(element, set()).add(cls)
|
||||||
|
return parsed
|
||||||
|
|
||||||
|
def _should_exclude_element(self, element, excluded_classes):
|
||||||
|
if self.html_excluded_elements and element.find_parent(self.html_excluded_elements):
|
||||||
|
return True
|
||||||
|
return self._is_element_excluded_by_class(element, excluded_classes)
|
||||||
|
|
||||||
|
def _is_element_excluded_by_class(self, element, excluded_classes):
|
||||||
|
for parent in element.parents:
|
||||||
|
if self._element_matches_exclusion(parent, excluded_classes):
|
||||||
|
return True
|
||||||
|
return self._element_matches_exclusion(element, excluded_classes)
|
||||||
|
|
||||||
|
def _element_matches_exclusion(self, element, excluded_classes):
|
||||||
|
if '*' in excluded_classes and any(cls in excluded_classes['*'] for cls in element.get('class', [])):
|
||||||
|
return True
|
||||||
|
return element.name in excluded_classes and \
|
||||||
|
any(cls in excluded_classes[element.name] for cls in element.get('class', []))
|
||||||
|
|
||||||
|
def _extract_element_content(self, element):
|
||||||
|
content = ' '.join(child.strip() for child in element.stripped_strings)
|
||||||
|
return f'<{element.name}>{content}</{element.name}>\n'
|
||||||
239
eveai_workers/Processors/pdf_processor.py
Normal file
239
eveai_workers/Processors/pdf_processor.py
Normal file
@@ -0,0 +1,239 @@
|
|||||||
|
import io
|
||||||
|
import pdfplumber
|
||||||
|
from flask import current_app
|
||||||
|
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||||
|
from langchain_core.output_parsers import StrOutputParser
|
||||||
|
from langchain_core.prompts import ChatPromptTemplate
|
||||||
|
import re
|
||||||
|
from langchain_core.runnables import RunnablePassthrough
|
||||||
|
|
||||||
|
from common.extensions import minio_client
|
||||||
|
from common.utils.model_utils import create_language_template
|
||||||
|
from .processor import Processor
|
||||||
|
|
||||||
|
|
||||||
|
class PDFProcessor(Processor):
|
||||||
|
def __init__(self, tenant, model_variables, document_version):
|
||||||
|
super().__init__(tenant, model_variables, document_version)
|
||||||
|
# PDF-specific initialization
|
||||||
|
self.chunk_size = model_variables['PDF_chunk_size']
|
||||||
|
self.chunk_overlap = model_variables['PDF_chunk_overlap']
|
||||||
|
self.min_chunk_size = model_variables['PDF_min_chunk_size']
|
||||||
|
self.max_chunk_size = model_variables['PDF_max_chunk_size']
|
||||||
|
|
||||||
|
def process(self):
|
||||||
|
self._log("Starting PDF processing")
|
||||||
|
try:
|
||||||
|
file_data = minio_client.download_document_file(
|
||||||
|
self.tenant.id,
|
||||||
|
self.document_version.doc_id,
|
||||||
|
self.document_version.language,
|
||||||
|
self.document_version.id,
|
||||||
|
self.document_version.file_name
|
||||||
|
)
|
||||||
|
|
||||||
|
extracted_content = self._extract_content(file_data)
|
||||||
|
structured_content, title = self._structure_content(extracted_content)
|
||||||
|
|
||||||
|
llm_chunks = self._split_content_for_llm(structured_content)
|
||||||
|
markdown = self._process_chunks_with_llm(llm_chunks)
|
||||||
|
|
||||||
|
self._save_markdown(markdown)
|
||||||
|
self._log("Finished processing PDF")
|
||||||
|
return markdown, title
|
||||||
|
except Exception as e:
|
||||||
|
self._log(f"Error processing PDF: {str(e)}", level='error')
|
||||||
|
raise
|
||||||
|
|
||||||
|
def _extract_content(self, file_data):
|
||||||
|
extracted_content = []
|
||||||
|
with pdfplumber.open(io.BytesIO(file_data)) as pdf:
|
||||||
|
figure_counter = 1
|
||||||
|
for page_num, page in enumerate(pdf.pages):
|
||||||
|
self._log(f"Extracting content from page {page_num + 1}")
|
||||||
|
page_content = {
|
||||||
|
'text': page.extract_text(),
|
||||||
|
'figures': self._extract_figures(page, page_num, figure_counter),
|
||||||
|
'tables': self._extract_tables(page)
|
||||||
|
}
|
||||||
|
if self.embed_tuning:
|
||||||
|
self._log(f'Extracted PDF Content for page {page_num + 1}')
|
||||||
|
self._log(f"{page_content }")
|
||||||
|
figure_counter += len(page_content['figures'])
|
||||||
|
extracted_content.append(page_content)
|
||||||
|
|
||||||
|
# if self.embed_tuning:
|
||||||
|
# current_app.embed_tuning_logger.debug(f'Extracted PDF Content')
|
||||||
|
# current_app.embed_tuning_logger.debug(f'---------------------')
|
||||||
|
# current_app.embed_tuning_logger.debug(f'Page: {page_content}')
|
||||||
|
# current_app.embed_tuning_logger.debug(f'End of Extracted PDF Content')
|
||||||
|
# current_app.embed_tuning_logger.debug(f'----------------------------')
|
||||||
|
|
||||||
|
return extracted_content
|
||||||
|
|
||||||
|
def _extract_figures(self, page, page_num, figure_counter):
|
||||||
|
figures = []
|
||||||
|
# Omit figure processing for now!
|
||||||
|
# for img in page.images:
|
||||||
|
# try:
|
||||||
|
# # Try to get the bbox, use full page dimensions if not available
|
||||||
|
# bbox = img.get('bbox', (0, 0, page.width, page.height))
|
||||||
|
#
|
||||||
|
# figure = {
|
||||||
|
# 'figure_number': figure_counter,
|
||||||
|
# 'filename': f"figure_{page_num + 1}_{figure_counter}.png",
|
||||||
|
# 'caption': self._find_figure_caption(page, bbox)
|
||||||
|
# }
|
||||||
|
#
|
||||||
|
# # Extract the figure as an image
|
||||||
|
# figure_image = page.within_bbox(bbox).to_image()
|
||||||
|
#
|
||||||
|
# # Save the figure using MinIO
|
||||||
|
# with io.BytesIO() as output:
|
||||||
|
# figure_image.save(output, format='PNG')
|
||||||
|
# output.seek(0)
|
||||||
|
# minio_client.upload_document_file(
|
||||||
|
# self.tenant.id,
|
||||||
|
# self.document_version.doc_id,
|
||||||
|
# self.document_version.language,
|
||||||
|
# self.document_version.id,
|
||||||
|
# figure['filename'],
|
||||||
|
# output.getvalue()
|
||||||
|
# )
|
||||||
|
#
|
||||||
|
# figures.append(figure)
|
||||||
|
# figure_counter += 1
|
||||||
|
# except Exception as e:
|
||||||
|
# self._log(f"Error processing figure on page {page_num + 1}: {str(e)}", level='error')
|
||||||
|
|
||||||
|
return figures
|
||||||
|
|
||||||
|
def _find_figure_caption(self, page, bbox):
|
||||||
|
try:
|
||||||
|
# Look for text below the figure
|
||||||
|
caption_bbox = (bbox[0], bbox[3], bbox[2], min(bbox[3] + 50, page.height))
|
||||||
|
caption_text = page.crop(caption_bbox).extract_text()
|
||||||
|
if caption_text and caption_text.lower().startswith('figure'):
|
||||||
|
return caption_text
|
||||||
|
except Exception as e:
|
||||||
|
self._log(f"Error finding figure caption: {str(e)}", level='error')
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _extract_tables(self, page):
|
||||||
|
tables = []
|
||||||
|
try:
|
||||||
|
for table in page.extract_tables():
|
||||||
|
if table:
|
||||||
|
markdown_table = self._table_to_markdown(table)
|
||||||
|
if markdown_table: # Only add non-empty tables
|
||||||
|
tables.append(markdown_table)
|
||||||
|
except Exception as e:
|
||||||
|
self._log(f"Error extracting tables from page: {str(e)}", level='error')
|
||||||
|
return tables
|
||||||
|
|
||||||
|
def _table_to_markdown(self, table):
|
||||||
|
if not table or not table[0]: # Check if table is empty or first row is empty
|
||||||
|
return "" # Return empty string for empty tables
|
||||||
|
|
||||||
|
def clean_cell(cell):
|
||||||
|
if cell is None:
|
||||||
|
return "" # Convert None to empty string
|
||||||
|
return str(cell).replace("|", "\\|") # Escape pipe characters and convert to string
|
||||||
|
|
||||||
|
header = [clean_cell(cell) for cell in table[0]]
|
||||||
|
markdown = "| " + " | ".join(header) + " |\n"
|
||||||
|
markdown += "| " + " | ".join(["---"] * len(header)) + " |\n"
|
||||||
|
|
||||||
|
for row in table[1:]:
|
||||||
|
cleaned_row = [clean_cell(cell) for cell in row]
|
||||||
|
markdown += "| " + " | ".join(cleaned_row) + " |\n"
|
||||||
|
|
||||||
|
return markdown
|
||||||
|
|
||||||
|
def _structure_content(self, extracted_content):
|
||||||
|
structured_content = ""
|
||||||
|
title = "Untitled Document"
|
||||||
|
current_heading_level = 0
|
||||||
|
heading_pattern = re.compile(r'^(\d+(\.\d+)*\.?\s*)?(.+)$')
|
||||||
|
|
||||||
|
def identify_heading(text):
|
||||||
|
match = heading_pattern.match(text.strip())
|
||||||
|
if match:
|
||||||
|
numbering, _, content = match.groups()
|
||||||
|
if numbering:
|
||||||
|
level = numbering.count('.') + 1
|
||||||
|
return level, f"{numbering}{content}"
|
||||||
|
else:
|
||||||
|
return 1, content # Assume it's a top-level heading if no numbering
|
||||||
|
return 0, text # Not a heading
|
||||||
|
|
||||||
|
for page in extracted_content:
|
||||||
|
# Assume the title is on the first page
|
||||||
|
if page == extracted_content[0]:
|
||||||
|
lines = page.get('text', '').split('\n')
|
||||||
|
if lines:
|
||||||
|
title = lines[0].strip() # Use the first non-empty line as the title
|
||||||
|
|
||||||
|
# Process text
|
||||||
|
paragraphs = page['text'].split('\n\n')
|
||||||
|
|
||||||
|
for para in paragraphs:
|
||||||
|
lines = para.strip().split('\n')
|
||||||
|
if len(lines) == 1: # Potential heading
|
||||||
|
level, text = identify_heading(lines[0])
|
||||||
|
if level > 0:
|
||||||
|
heading_marks = '#' * level
|
||||||
|
structured_content += f"\n\n{heading_marks} {text}\n\n"
|
||||||
|
if level == 1 and not title:
|
||||||
|
title = text # Use the first top-level heading as the title if not set
|
||||||
|
else:
|
||||||
|
structured_content += f"{para}\n\n" # Treat as normal paragraph
|
||||||
|
else:
|
||||||
|
structured_content += f"{para}\n\n" # Multi-line paragraph
|
||||||
|
|
||||||
|
# Process figures
|
||||||
|
for figure in page.get('figures', []):
|
||||||
|
structured_content += f"\n\n![Figure {figure['figure_number']}]({figure['filename']})\n\n"
|
||||||
|
if figure['caption']:
|
||||||
|
structured_content += f"*Figure {figure['figure_number']}: {figure['caption']}*\n\n"
|
||||||
|
|
||||||
|
# Add tables
|
||||||
|
if 'tables' in page:
|
||||||
|
for table in page['tables']:
|
||||||
|
structured_content += f"\n{table}\n"
|
||||||
|
|
||||||
|
if self.embed_tuning:
|
||||||
|
self._save_intermediate(structured_content, "structured_content.md")
|
||||||
|
|
||||||
|
return structured_content, title
|
||||||
|
|
||||||
|
def _split_content_for_llm(self, content):
|
||||||
|
text_splitter = RecursiveCharacterTextSplitter(
|
||||||
|
chunk_size=self.chunk_size,
|
||||||
|
chunk_overlap=self.chunk_overlap,
|
||||||
|
length_function=len,
|
||||||
|
separators=["\n\n", "\n", " ", ""]
|
||||||
|
)
|
||||||
|
return text_splitter.split_text(content)
|
||||||
|
|
||||||
|
def _process_chunks_with_llm(self, chunks):
|
||||||
|
llm = self.model_variables['llm']
|
||||||
|
template = self.model_variables['pdf_parse_template']
|
||||||
|
pdf_prompt = ChatPromptTemplate.from_template(template)
|
||||||
|
setup = RunnablePassthrough()
|
||||||
|
output_parser = StrOutputParser()
|
||||||
|
chain = setup | pdf_prompt | llm | output_parser
|
||||||
|
|
||||||
|
markdown_chunks = []
|
||||||
|
for chunk in chunks:
|
||||||
|
input = {"pdf_content": chunk}
|
||||||
|
result = chain.invoke(input)
|
||||||
|
# Remove Markdown code block delimiters if present
|
||||||
|
result = result.strip()
|
||||||
|
if result.startswith("```markdown"):
|
||||||
|
result = result[len("```markdown"):].strip()
|
||||||
|
if result.endswith("```"):
|
||||||
|
result = result[:-3].strip()
|
||||||
|
markdown_chunks.append(result)
|
||||||
|
|
||||||
|
return "\n\n".join(markdown_chunks)
|
||||||
42
eveai_workers/Processors/processor.py
Normal file
42
eveai_workers/Processors/processor.py
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from flask import current_app
|
||||||
|
from common.extensions import minio_client
|
||||||
|
|
||||||
|
|
||||||
|
class Processor(ABC):
|
||||||
|
def __init__(self, tenant, model_variables, document_version):
|
||||||
|
self.tenant = tenant
|
||||||
|
self.model_variables = model_variables
|
||||||
|
self.document_version = document_version
|
||||||
|
self.embed_tuning = model_variables['embed_tuning']
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def process(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def _save_markdown(self, markdown):
|
||||||
|
markdown_filename = f"{self.document_version.id}.md"
|
||||||
|
minio_client.upload_document_file(
|
||||||
|
self.tenant.id,
|
||||||
|
self.document_version.doc_id,
|
||||||
|
self.document_version.language,
|
||||||
|
self.document_version.id,
|
||||||
|
markdown_filename,
|
||||||
|
markdown.encode('utf-8')
|
||||||
|
)
|
||||||
|
|
||||||
|
def _log(self, message, level='debug'):
|
||||||
|
logger = current_app.logger
|
||||||
|
log_method = getattr(logger, level)
|
||||||
|
log_method(
|
||||||
|
f"{self.__class__.__name__} - Tenant {self.tenant.id}, Document {self.document_version.id}: {message}")
|
||||||
|
|
||||||
|
def _save_intermediate(self, content, filename):
|
||||||
|
minio_client.upload_document_file(
|
||||||
|
self.tenant.id,
|
||||||
|
self.document_version.doc_id,
|
||||||
|
self.document_version.language,
|
||||||
|
self.document_version.id,
|
||||||
|
filename,
|
||||||
|
content.encode('utf-8')
|
||||||
|
)
|
||||||
80
eveai_workers/Processors/srt_processor.py
Normal file
80
eveai_workers/Processors/srt_processor.py
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
import re
|
||||||
|
from langchain_core.output_parsers import StrOutputParser
|
||||||
|
from langchain_core.prompts import ChatPromptTemplate
|
||||||
|
from langchain_core.runnables import RunnablePassthrough
|
||||||
|
from common.extensions import minio_client
|
||||||
|
from common.utils.model_utils import create_language_template
|
||||||
|
from .processor import Processor
|
||||||
|
|
||||||
|
|
||||||
|
class SRTProcessor(Processor):
|
||||||
|
def __init__(self, tenant, model_variables, document_version):
|
||||||
|
super().__init__(tenant, model_variables, document_version)
|
||||||
|
|
||||||
|
def process(self):
|
||||||
|
self._log("Starting SRT processing")
|
||||||
|
try:
|
||||||
|
file_data = minio_client.download_document_file(
|
||||||
|
self.tenant.id,
|
||||||
|
self.document_version.doc_id,
|
||||||
|
self.document_version.language,
|
||||||
|
self.document_version.id,
|
||||||
|
self.document_version.file_name
|
||||||
|
)
|
||||||
|
|
||||||
|
srt_content = file_data.decode('utf-8')
|
||||||
|
cleaned_transcription = self._clean_srt(srt_content)
|
||||||
|
markdown, title = self._generate_markdown_from_transcription(cleaned_transcription)
|
||||||
|
|
||||||
|
self._save_markdown(markdown)
|
||||||
|
self._log("Finished processing SRT")
|
||||||
|
return markdown, title
|
||||||
|
except Exception as e:
|
||||||
|
self._log(f"Error processing SRT: {str(e)}", level='error')
|
||||||
|
raise
|
||||||
|
|
||||||
|
def _clean_srt(self, srt_content):
|
||||||
|
# Remove timecodes and subtitle numbers
|
||||||
|
cleaned_lines = []
|
||||||
|
for line in srt_content.split('\n'):
|
||||||
|
# Skip empty lines, subtitle numbers, and timecodes
|
||||||
|
if line.strip() and not line.strip().isdigit() and not re.match(
|
||||||
|
r'\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}', line):
|
||||||
|
cleaned_lines.append(line.strip())
|
||||||
|
|
||||||
|
# Join the cleaned lines
|
||||||
|
cleaned_text = ' '.join(cleaned_lines)
|
||||||
|
|
||||||
|
# Remove any extra spaces
|
||||||
|
cleaned_text = re.sub(r'\s+', ' ', cleaned_text).strip()
|
||||||
|
|
||||||
|
return cleaned_text
|
||||||
|
|
||||||
|
def _generate_markdown_from_transcription(self, transcription):
|
||||||
|
self._log("Generating markdown from transcription")
|
||||||
|
llm = self.model_variables['llm']
|
||||||
|
template = self.model_variables['transcript_template']
|
||||||
|
language_template = create_language_template(template, self.document_version.language)
|
||||||
|
transcript_prompt = ChatPromptTemplate.from_template(language_template)
|
||||||
|
setup = RunnablePassthrough()
|
||||||
|
output_parser = StrOutputParser()
|
||||||
|
|
||||||
|
chain = setup | transcript_prompt | llm | output_parser
|
||||||
|
|
||||||
|
input_transcript = {'transcript': transcription}
|
||||||
|
markdown = chain.invoke(input_transcript)
|
||||||
|
|
||||||
|
# Extract title from the markdown
|
||||||
|
title = self._extract_title_from_markdown(markdown)
|
||||||
|
|
||||||
|
return markdown, title
|
||||||
|
|
||||||
|
def _extract_title_from_markdown(self, markdown):
|
||||||
|
# Simple extraction of the first header as the title
|
||||||
|
lines = markdown.split('\n')
|
||||||
|
for line in lines:
|
||||||
|
if line.startswith('# '):
|
||||||
|
return line[2:].strip()
|
||||||
|
return "Untitled SRT Transcription"
|
||||||
|
|
||||||
|
|
||||||
@@ -1,26 +1,16 @@
|
|||||||
import io
|
import io
|
||||||
import os
|
import os
|
||||||
from datetime import datetime as dt, timezone as tz
|
from datetime import datetime as dt, timezone as tz
|
||||||
import subprocess
|
|
||||||
|
|
||||||
|
|
||||||
import gevent
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
import html
|
|
||||||
from celery import states
|
from celery import states
|
||||||
from flask import current_app
|
from flask import current_app
|
||||||
# OpenAI imports
|
# OpenAI imports
|
||||||
from langchain.chains.summarize import load_summarize_chain
|
from langchain.text_splitter import MarkdownHeaderTextSplitter
|
||||||
from langchain.text_splitter import CharacterTextSplitter, MarkdownHeaderTextSplitter
|
|
||||||
from langchain_core.exceptions import LangChainException
|
from langchain_core.exceptions import LangChainException
|
||||||
from langchain_core.output_parsers import StrOutputParser
|
from langchain_core.output_parsers import StrOutputParser
|
||||||
from langchain_core.prompts import ChatPromptTemplate
|
from langchain_core.prompts import ChatPromptTemplate
|
||||||
from langchain_core.runnables import RunnablePassthrough
|
from langchain_core.runnables import RunnablePassthrough
|
||||||
from sqlalchemy.exc import SQLAlchemyError
|
from sqlalchemy.exc import SQLAlchemyError
|
||||||
from pytube import YouTube
|
|
||||||
import PyPDF2
|
|
||||||
from pydub import AudioSegment
|
|
||||||
import tempfile
|
|
||||||
|
|
||||||
from common.extensions import db, minio_client
|
from common.extensions import db, minio_client
|
||||||
from common.models.document import DocumentVersion, Embedding
|
from common.models.document import DocumentVersion, Embedding
|
||||||
@@ -29,6 +19,10 @@ from common.utils.celery_utils import current_celery
|
|||||||
from common.utils.database import Database
|
from common.utils.database import Database
|
||||||
from common.utils.model_utils import select_model_variables, create_language_template
|
from common.utils.model_utils import select_model_variables, create_language_template
|
||||||
from common.utils.os_utils import safe_remove, sync_folder
|
from common.utils.os_utils import safe_remove, sync_folder
|
||||||
|
from eveai_workers.Processors.audio_processor import AudioProcessor
|
||||||
|
from eveai_workers.Processors.html_processor import HTMLProcessor
|
||||||
|
from eveai_workers.Processors.pdf_processor import PDFProcessor
|
||||||
|
from eveai_workers.Processors.srt_processor import SRTProcessor
|
||||||
|
|
||||||
|
|
||||||
@current_celery.task(name='create_embeddings', queue='embeddings')
|
@current_celery.task(name='create_embeddings', queue='embeddings')
|
||||||
@@ -84,8 +78,10 @@ def create_embeddings(tenant_id, document_version_id):
|
|||||||
process_pdf(tenant, model_variables, document_version)
|
process_pdf(tenant, model_variables, document_version)
|
||||||
case 'html':
|
case 'html':
|
||||||
process_html(tenant, model_variables, document_version)
|
process_html(tenant, model_variables, document_version)
|
||||||
case 'youtube':
|
case 'srt':
|
||||||
process_youtube(tenant, model_variables, document_version)
|
process_srt(tenant, model_variables, document_version)
|
||||||
|
case 'mp4' | 'mp3' | 'ogg':
|
||||||
|
process_audio(tenant, model_variables, document_version)
|
||||||
case _:
|
case _:
|
||||||
raise Exception(f'No functionality defined for file type {document_version.file_type} '
|
raise Exception(f'No functionality defined for file type {document_version.file_type} '
|
||||||
f'for tenant {tenant_id} '
|
f'for tenant {tenant_id} '
|
||||||
@@ -103,49 +99,6 @@ def create_embeddings(tenant_id, document_version_id):
|
|||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
||||||
def process_pdf(tenant, model_variables, document_version):
|
|
||||||
file_data = minio_client.download_document_file(tenant.id, document_version.doc_id, document_version.language,
|
|
||||||
document_version.id, document_version.file_name)
|
|
||||||
|
|
||||||
pdf_text = ''
|
|
||||||
pdf_reader = PyPDF2.PdfReader(io.BytesIO(file_data))
|
|
||||||
for page in pdf_reader.pages:
|
|
||||||
pdf_text += page.extract_text()
|
|
||||||
|
|
||||||
markdown = generate_markdown_from_pdf(tenant, model_variables, document_version, pdf_text)
|
|
||||||
markdown_file_name = f'{document_version.id}.md'
|
|
||||||
minio_client.upload_document_file(tenant.id, document_version.doc_id, document_version.language, document_version.id,
|
|
||||||
markdown_file_name, markdown.encode())
|
|
||||||
|
|
||||||
potential_chunks = create_potential_chunks_for_markdown(tenant.id, document_version, markdown_file_name)
|
|
||||||
chunks = combine_chunks_for_markdown(potential_chunks, model_variables['min_chunk_size'],
|
|
||||||
model_variables['max_chunk_size'])
|
|
||||||
|
|
||||||
if len(chunks) > 1:
|
|
||||||
summary = summarize_chunk(tenant, model_variables, document_version, chunks[0])
|
|
||||||
document_version.system_context = f'Summary: {summary}\n'
|
|
||||||
else:
|
|
||||||
document_version.system_context = ''
|
|
||||||
|
|
||||||
enriched_chunks = enrich_chunks(tenant, document_version, chunks)
|
|
||||||
embeddings = embed_chunks(tenant, model_variables, document_version, enriched_chunks)
|
|
||||||
|
|
||||||
try:
|
|
||||||
db.session.add(document_version)
|
|
||||||
document_version.processing_finished_at = dt.now(tz.utc)
|
|
||||||
document_version.processing = False
|
|
||||||
db.session.add_all(embeddings)
|
|
||||||
db.session.commit()
|
|
||||||
except SQLAlchemyError as e:
|
|
||||||
current_app.logger.error(f'Error saving embedding information for tenant {tenant.id} '
|
|
||||||
f'on HTML, document version {document_version.id}'
|
|
||||||
f'error: {e}')
|
|
||||||
raise
|
|
||||||
|
|
||||||
current_app.logger.info(f'Embeddings created successfully for tenant {tenant.id} '
|
|
||||||
f'on document version {document_version.id} :-)')
|
|
||||||
|
|
||||||
|
|
||||||
def delete_embeddings_for_document_version(document_version):
|
def delete_embeddings_for_document_version(document_version):
|
||||||
embeddings_to_delete = db.session.query(Embedding).filter_by(doc_vers_id=document_version.id).all()
|
embeddings_to_delete = db.session.query(Embedding).filter_by(doc_vers_id=document_version.id).all()
|
||||||
for embedding in embeddings_to_delete:
|
for embedding in embeddings_to_delete:
|
||||||
@@ -158,43 +111,53 @@ def delete_embeddings_for_document_version(document_version):
|
|||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
def process_pdf(tenant, model_variables, document_version):
|
||||||
|
processor = PDFProcessor(tenant, model_variables, document_version)
|
||||||
|
markdown, title = processor.process()
|
||||||
|
|
||||||
|
# Process markdown and embed
|
||||||
|
embed_markdown(tenant, model_variables, document_version, markdown, title)
|
||||||
|
|
||||||
|
|
||||||
def process_html(tenant, model_variables, document_version):
|
def process_html(tenant, model_variables, document_version):
|
||||||
file_data = minio_client.download_document_file(tenant.id, document_version.doc_id, document_version.language,
|
processor = HTMLProcessor(tenant, model_variables, document_version)
|
||||||
document_version.id, document_version.file_name)
|
markdown, title = processor.process()
|
||||||
html_content = file_data.decode('utf-8')
|
|
||||||
|
|
||||||
# The tags to be considered can be dependent on the tenant
|
# Process markdown and embed
|
||||||
html_tags = model_variables['html_tags']
|
embed_markdown(tenant, model_variables, document_version, markdown, title)
|
||||||
html_end_tags = model_variables['html_end_tags']
|
|
||||||
html_included_elements = model_variables['html_included_elements']
|
|
||||||
html_excluded_elements = model_variables['html_excluded_elements']
|
|
||||||
|
|
||||||
extracted_html, title = parse_html(tenant, html_content, html_tags, included_elements=html_included_elements,
|
|
||||||
excluded_elements=html_excluded_elements)
|
|
||||||
|
|
||||||
extracted_file_name = f'{document_version.id}-extracted.html'
|
def process_audio(tenant, model_variables, document_version):
|
||||||
minio_client.upload_document_file(tenant.id, document_version.doc_id, document_version.language, document_version.id,
|
processor = AudioProcessor(tenant, model_variables, document_version)
|
||||||
extracted_file_name, extracted_html.encode())
|
markdown, title = processor.process()
|
||||||
|
|
||||||
markdown = generate_markdown_from_html(tenant, model_variables, document_version, extracted_html)
|
# Process markdown and embed
|
||||||
markdown_file_name = f'{document_version.id}.md'
|
embed_markdown(tenant, model_variables, document_version, markdown, title)
|
||||||
minio_client.upload_document_file(tenant.id, document_version.doc_id, document_version.language, document_version.id,
|
|
||||||
markdown_file_name, markdown.encode())
|
|
||||||
|
|
||||||
potential_chunks = create_potential_chunks_for_markdown(tenant.id, document_version, markdown_file_name)
|
|
||||||
|
def process_srt(tenant, model_variables, document_version):
|
||||||
|
processor = SRTProcessor(tenant, model_variables, document_version)
|
||||||
|
markdown, title = processor.process()
|
||||||
|
|
||||||
|
# Process markdown and embed
|
||||||
|
embed_markdown(tenant, model_variables, document_version, markdown, title)
|
||||||
|
|
||||||
|
|
||||||
|
def embed_markdown(tenant, model_variables, document_version, markdown, title):
|
||||||
|
# Create potential chunks
|
||||||
|
potential_chunks = create_potential_chunks_for_markdown(tenant.id, document_version, f"{document_version.id}.md")
|
||||||
|
|
||||||
|
# Combine chunks for embedding
|
||||||
chunks = combine_chunks_for_markdown(potential_chunks, model_variables['min_chunk_size'],
|
chunks = combine_chunks_for_markdown(potential_chunks, model_variables['min_chunk_size'],
|
||||||
model_variables['max_chunk_size'])
|
model_variables['max_chunk_size'])
|
||||||
|
|
||||||
if len(chunks) > 1:
|
# Enrich chunks
|
||||||
summary = summarize_chunk(tenant, model_variables, document_version, chunks[0])
|
enriched_chunks = enrich_chunks(tenant, model_variables, document_version, title, chunks)
|
||||||
document_version.system_context = (f'Title: {title}\n'
|
|
||||||
f'Summary: {summary}\n')
|
|
||||||
else:
|
|
||||||
document_version.system_context = (f'Title: {title}\n')
|
|
||||||
|
|
||||||
enriched_chunks = enrich_chunks(tenant, document_version, title, chunks)
|
# Create embeddings
|
||||||
embeddings = embed_chunks(tenant, model_variables, document_version, enriched_chunks)
|
embeddings = embed_chunks(tenant, model_variables, document_version, enriched_chunks)
|
||||||
|
|
||||||
|
# Update document version and save embeddings
|
||||||
try:
|
try:
|
||||||
db.session.add(document_version)
|
db.session.add(document_version)
|
||||||
document_version.processing_finished_at = dt.now(tz.utc)
|
document_version.processing_finished_at = dt.now(tz.utc)
|
||||||
@@ -211,12 +174,18 @@ def process_html(tenant, model_variables, document_version):
|
|||||||
f'on document version {document_version.id} :-)')
|
f'on document version {document_version.id} :-)')
|
||||||
|
|
||||||
|
|
||||||
def enrich_chunks(tenant, document_version, title, chunks):
|
def enrich_chunks(tenant, model_variables, document_version, title, chunks):
|
||||||
current_app.logger.debug(f'Enriching chunks for tenant {tenant.id} '
|
current_app.logger.debug(f'Enriching chunks for tenant {tenant.id} '
|
||||||
f'on document version {document_version.id}')
|
f'on document version {document_version.id}')
|
||||||
current_app.logger.debug(f'Nr of chunks: {len(chunks)}')
|
|
||||||
|
summary = ''
|
||||||
|
if len(chunks) > 1:
|
||||||
|
summary = summarize_chunk(tenant, model_variables, document_version, chunks[0])
|
||||||
|
|
||||||
chunk_total_context = (f'Filename: {document_version.file_name}\n'
|
chunk_total_context = (f'Filename: {document_version.file_name}\n'
|
||||||
f'User Context:\n{document_version.user_context}\n\n'
|
f'User Context:\n{document_version.user_context}\n\n'
|
||||||
|
f'Title: {title}\n'
|
||||||
|
f'{summary}\n'
|
||||||
f'{document_version.system_context}\n\n')
|
f'{document_version.system_context}\n\n')
|
||||||
enriched_chunks = []
|
enriched_chunks = []
|
||||||
initial_chunk = (f'Filename: {document_version.file_name}\n'
|
initial_chunk = (f'Filename: {document_version.file_name}\n'
|
||||||
@@ -235,40 +204,6 @@ def enrich_chunks(tenant, document_version, title, chunks):
|
|||||||
return enriched_chunks
|
return enriched_chunks
|
||||||
|
|
||||||
|
|
||||||
def generate_markdown_from_html(tenant, model_variables, document_version, html_content):
|
|
||||||
current_app.logger.debug(f'Generating Markdown from HTML for tenant {tenant.id} '
|
|
||||||
f'on document version {document_version.id}')
|
|
||||||
llm = model_variables['llm']
|
|
||||||
template = model_variables['html_parse_template']
|
|
||||||
parse_prompt = ChatPromptTemplate.from_template(template)
|
|
||||||
setup = RunnablePassthrough()
|
|
||||||
output_parser = StrOutputParser()
|
|
||||||
|
|
||||||
chain = setup | parse_prompt | llm | output_parser
|
|
||||||
input_html = {"html": html_content}
|
|
||||||
|
|
||||||
markdown = chain.invoke(input_html)
|
|
||||||
|
|
||||||
return markdown
|
|
||||||
|
|
||||||
|
|
||||||
def generate_markdown_from_pdf(tenant, model_variables, document_version, pdf_content):
|
|
||||||
current_app.logger.debug(f'Generating Markdown from PDF for tenant {tenant.id} '
|
|
||||||
f'on document version {document_version.id}')
|
|
||||||
llm = model_variables['llm']
|
|
||||||
template = model_variables['pdf_parse_template']
|
|
||||||
parse_prompt = ChatPromptTemplate.from_template(template)
|
|
||||||
setup = RunnablePassthrough()
|
|
||||||
output_parser = StrOutputParser()
|
|
||||||
|
|
||||||
chain = setup | parse_prompt | llm | output_parser
|
|
||||||
input_pdf = {"pdf_content": pdf_content}
|
|
||||||
|
|
||||||
markdown = chain.invoke(input_pdf)
|
|
||||||
|
|
||||||
return markdown
|
|
||||||
|
|
||||||
|
|
||||||
def summarize_chunk(tenant, model_variables, document_version, chunk):
|
def summarize_chunk(tenant, model_variables, document_version, chunk):
|
||||||
current_app.logger.debug(f'Summarizing chunk for tenant {tenant.id} '
|
current_app.logger.debug(f'Summarizing chunk for tenant {tenant.id} '
|
||||||
f'on document version {document_version.id}')
|
f'on document version {document_version.id}')
|
||||||
@@ -323,274 +258,252 @@ def embed_chunks(tenant, model_variables, document_version, chunks):
|
|||||||
return new_embeddings
|
return new_embeddings
|
||||||
|
|
||||||
|
|
||||||
def parse_html(tenant, html_content, tags, included_elements=None, excluded_elements=None):
|
def log_parsing_info(tenant, tags, included_elements, excluded_elements, excluded_classes, elements_to_parse):
|
||||||
soup = BeautifulSoup(html_content, 'html.parser')
|
|
||||||
extracted_html = ''
|
|
||||||
|
|
||||||
if included_elements:
|
|
||||||
elements_to_parse = soup.find_all(included_elements)
|
|
||||||
else:
|
|
||||||
elements_to_parse = [soup] # parse the entire document if no included_elements specified
|
|
||||||
|
|
||||||
if tenant.embed_tuning:
|
if tenant.embed_tuning:
|
||||||
current_app.embed_tuning_logger.debug(f'Tags to parse: {tags}')
|
current_app.embed_tuning_logger.debug(f'Tags to parse: {tags}')
|
||||||
current_app.embed_tuning_logger.debug(f'Included Elements: {included_elements}')
|
current_app.embed_tuning_logger.debug(f'Included Elements: {included_elements}')
|
||||||
current_app.embed_tuning_logger.debug(f'Included Elements: {len(included_elements)}')
|
|
||||||
current_app.embed_tuning_logger.debug(f'Excluded Elements: {excluded_elements}')
|
current_app.embed_tuning_logger.debug(f'Excluded Elements: {excluded_elements}')
|
||||||
|
current_app.embed_tuning_logger.debug(f'Excluded Classes: {excluded_classes}')
|
||||||
current_app.embed_tuning_logger.debug(f'Found {len(elements_to_parse)} elements to parse')
|
current_app.embed_tuning_logger.debug(f'Found {len(elements_to_parse)} elements to parse')
|
||||||
current_app.embed_tuning_logger.debug(f'First element to parse: {elements_to_parse[0]}')
|
current_app.embed_tuning_logger.debug(f'First element to parse: {elements_to_parse[0]}')
|
||||||
|
|
||||||
# Iterate through the found included elements
|
|
||||||
for element in elements_to_parse:
|
|
||||||
# Find all specified tags within each included element
|
|
||||||
for sub_element in element.find_all(tags):
|
|
||||||
if tenant.embed_tuning:
|
|
||||||
current_app.embed_tuning_logger.debug(f'Found element: {sub_element.name}')
|
|
||||||
if excluded_elements and sub_element.find_parent(excluded_elements):
|
|
||||||
continue # Skip this sub_element if it's within any of the excluded_elements
|
|
||||||
extracted_html += f'<{sub_element.name}>{sub_element.get_text(strip=True)}</{sub_element.name}>\n'
|
|
||||||
|
|
||||||
title = soup.find('title').get_text(strip=True)
|
# def process_youtube(tenant, model_variables, document_version):
|
||||||
|
# download_file_name = f'{document_version.id}.mp4'
|
||||||
return extracted_html, title
|
# compressed_file_name = f'{document_version.id}.mp3'
|
||||||
|
# transcription_file_name = f'{document_version.id}.txt'
|
||||||
|
# markdown_file_name = f'{document_version.id}.md'
|
||||||
def process_youtube(tenant, model_variables, document_version):
|
#
|
||||||
base_path = os.path.join(current_app.config['UPLOAD_FOLDER'],
|
# # Remove existing files (in case of a re-processing of the file
|
||||||
document_version.file_location)
|
# minio_client.delete_document_file(tenant.id, document_version.doc_id, document_version.language,
|
||||||
download_file_name = f'{document_version.id}.mp4'
|
# document_version.id, download_file_name)
|
||||||
compressed_file_name = f'{document_version.id}.mp3'
|
# minio_client.delete_document_file(tenant.id, document_version.doc_id, document_version.language,
|
||||||
transcription_file_name = f'{document_version.id}.txt'
|
# document_version.id, compressed_file_name)
|
||||||
markdown_file_name = f'{document_version.id}.md'
|
# minio_client.delete_document_file(tenant.id, document_version.doc_id, document_version.language,
|
||||||
|
# document_version.id, transcription_file_name)
|
||||||
# Remove existing files (in case of a re-processing of the file
|
# minio_client.delete_document_file(tenant.id, document_version.doc_id, document_version.language,
|
||||||
minio_client.delete_document_file(tenant.id, document_version.doc_id, document_version.language,
|
# document_version.id, markdown_file_name)
|
||||||
document_version.id, download_file_name)
|
#
|
||||||
minio_client.delete_document_file(tenant.id, document_version.doc_id, document_version.language,
|
# of, title, description, author = download_youtube(document_version.url, tenant.id, document_version,
|
||||||
document_version.id, compressed_file_name)
|
# download_file_name)
|
||||||
minio_client.delete_document_file(tenant.id, document_version.doc_id, document_version.language,
|
# document_version.system_context = f'Title: {title}\nDescription: {description}\nAuthor: {author}'
|
||||||
document_version.id, transcription_file_name)
|
# compress_audio(tenant.id, document_version, download_file_name, compressed_file_name)
|
||||||
minio_client.delete_document_file(tenant.id, document_version.doc_id, document_version.language,
|
# transcribe_audio(tenant.id, document_version, compressed_file_name, transcription_file_name, model_variables)
|
||||||
document_version.id, markdown_file_name)
|
# annotate_transcription(tenant, document_version, transcription_file_name, markdown_file_name, model_variables)
|
||||||
|
#
|
||||||
of, title, description, author = download_youtube(document_version.url, tenant.id, document_version,
|
# potential_chunks = create_potential_chunks_for_markdown(tenant.id, document_version, markdown_file_name)
|
||||||
download_file_name)
|
# actual_chunks = combine_chunks_for_markdown(potential_chunks, model_variables['min_chunk_size'],
|
||||||
document_version.system_context = f'Title: {title}\nDescription: {description}\nAuthor: {author}'
|
# model_variables['max_chunk_size'])
|
||||||
compress_audio(tenant.id, document_version, download_file_name, compressed_file_name)
|
#
|
||||||
transcribe_audio(tenant.id, document_version, compressed_file_name, transcription_file_name, model_variables)
|
# enriched_chunks = enrich_chunks(tenant, document_version, actual_chunks)
|
||||||
annotate_transcription(tenant, document_version, transcription_file_name, markdown_file_name, model_variables)
|
# embeddings = embed_chunks(tenant, model_variables, document_version, enriched_chunks)
|
||||||
|
#
|
||||||
potential_chunks = create_potential_chunks_for_markdown(tenant.id, document_version, markdown_file_name)
|
# try:
|
||||||
actual_chunks = combine_chunks_for_markdown(potential_chunks, model_variables['min_chunk_size'],
|
# db.session.add(document_version)
|
||||||
model_variables['max_chunk_size'])
|
# document_version.processing_finished_at = dt.now(tz.utc)
|
||||||
|
# document_version.processing = False
|
||||||
enriched_chunks = enrich_chunks(tenant, document_version, actual_chunks)
|
# db.session.add_all(embeddings)
|
||||||
embeddings = embed_chunks(tenant, model_variables, document_version, enriched_chunks)
|
# db.session.commit()
|
||||||
|
# except SQLAlchemyError as e:
|
||||||
try:
|
# current_app.logger.error(f'Error saving embedding information for tenant {tenant.id} '
|
||||||
db.session.add(document_version)
|
# f'on Youtube document version {document_version.id}'
|
||||||
document_version.processing_finished_at = dt.now(tz.utc)
|
# f'error: {e}')
|
||||||
document_version.processing = False
|
# raise
|
||||||
db.session.add_all(embeddings)
|
#
|
||||||
db.session.commit()
|
# current_app.logger.info(f'Embeddings created successfully for tenant {tenant.id} '
|
||||||
except SQLAlchemyError as e:
|
# f'on Youtube document version {document_version.id} :-)')
|
||||||
current_app.logger.error(f'Error saving embedding information for tenant {tenant.id} '
|
#
|
||||||
f'on Youtube document version {document_version.id}'
|
#
|
||||||
f'error: {e}')
|
# def download_youtube(url, tenant_id, document_version, file_name):
|
||||||
raise
|
# try:
|
||||||
|
# current_app.logger.info(f'Downloading YouTube video: {url} for tenant: {tenant_id}')
|
||||||
current_app.logger.info(f'Embeddings created successfully for tenant {tenant.id} '
|
# yt = YouTube(url)
|
||||||
f'on Youtube document version {document_version.id} :-)')
|
# stream = yt.streams.get_audio_only()
|
||||||
|
#
|
||||||
|
# with tempfile.NamedTemporaryFile(delete=False) as temp_file:
|
||||||
def download_youtube(url, tenant_id, document_version, file_name):
|
# stream.download(output_path=temp_file.name)
|
||||||
try:
|
# with open(temp_file.name, 'rb') as f:
|
||||||
current_app.logger.info(f'Downloading YouTube video: {url} for tenant: {tenant_id}')
|
# file_data = f.read()
|
||||||
yt = YouTube(url)
|
#
|
||||||
stream = yt.streams.get_audio_only()
|
# minio_client.upload_document_file(tenant_id, document_version.doc_id, document_version.language,
|
||||||
|
# document_version.id,
|
||||||
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
|
# file_name, file_data)
|
||||||
stream.download(output_path=temp_file.name)
|
#
|
||||||
with open(temp_file.name, 'rb') as f:
|
# current_app.logger.info(f'Downloaded YouTube video: {url} for tenant: {tenant_id}')
|
||||||
file_data = f.read()
|
# return file_name, yt.title, yt.description, yt.author
|
||||||
|
# except Exception as e:
|
||||||
minio_client.upload_document_file(tenant_id, document_version.doc_id, document_version.language, document_version.id,
|
# current_app.logger.error(f'Error downloading YouTube video: {url} for tenant: {tenant_id} with error: {e}')
|
||||||
file_name, file_data)
|
# raise
|
||||||
|
#
|
||||||
current_app.logger.info(f'Downloaded YouTube video: {url} for tenant: {tenant_id}')
|
#
|
||||||
return file_name, yt.title, yt.description, yt.author
|
# def compress_audio(tenant_id, document_version, input_file, output_file):
|
||||||
except Exception as e:
|
# try:
|
||||||
current_app.logger.error(f'Error downloading YouTube video: {url} for tenant: {tenant_id} with error: {e}')
|
# current_app.logger.info(f'Compressing audio for tenant: {tenant_id}')
|
||||||
raise
|
#
|
||||||
|
# input_data = minio_client.download_document_file(tenant_id, document_version.doc_id, document_version.language,
|
||||||
|
# document_version.id, input_file)
|
||||||
def compress_audio(tenant_id, document_version, input_file, output_file):
|
#
|
||||||
try:
|
# with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_input:
|
||||||
current_app.logger.info(f'Compressing audio for tenant: {tenant_id}')
|
# temp_input.write(input_data)
|
||||||
|
# temp_input.flush()
|
||||||
input_data = minio_client.download_document_file(tenant_id, document_version.doc_id, document_version.language,
|
#
|
||||||
document_version.id, input_file)
|
# with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_output:
|
||||||
|
# result = subprocess.run(
|
||||||
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_input:
|
# ['ffmpeg', '-i', temp_input.name, '-b:a', '64k', '-f', 'mp3', temp_output.name],
|
||||||
temp_input.write(input_data)
|
# capture_output=True,
|
||||||
temp_input.flush()
|
# text=True
|
||||||
|
# )
|
||||||
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_output:
|
#
|
||||||
result = subprocess.run(
|
# if result.returncode != 0:
|
||||||
['ffmpeg', '-i', temp_input.name, '-b:a', '64k', '-f', 'mp3', temp_output.name],
|
# raise Exception(f"Compression failed: {result.stderr}")
|
||||||
capture_output=True,
|
#
|
||||||
text=True
|
# with open(temp_output.name, 'rb') as f:
|
||||||
)
|
# compressed_data = f.read()
|
||||||
|
#
|
||||||
if result.returncode != 0:
|
# minio_client.upload_document_file(tenant_id, document_version.doc_id, document_version.language,
|
||||||
raise Exception(f"Compression failed: {result.stderr}")
|
# document_version.id,
|
||||||
|
# output_file, compressed_data)
|
||||||
with open(temp_output.name, 'rb') as f:
|
#
|
||||||
compressed_data = f.read()
|
# current_app.logger.info(f'Compressed audio for tenant: {tenant_id}')
|
||||||
|
# except Exception as e:
|
||||||
minio_client.upload_document_file(tenant_id, document_version.doc_id, document_version.language, document_version.id,
|
# current_app.logger.error(f'Error compressing audio for tenant: {tenant_id} with error: {e}')
|
||||||
output_file, compressed_data)
|
# raise
|
||||||
|
#
|
||||||
current_app.logger.info(f'Compressed audio for tenant: {tenant_id}')
|
#
|
||||||
except Exception as e:
|
# def transcribe_audio(tenant_id, document_version, input_file, output_file, model_variables):
|
||||||
current_app.logger.error(f'Error compressing audio for tenant: {tenant_id} with error: {e}')
|
# try:
|
||||||
raise
|
# current_app.logger.info(f'Transcribing audio for tenant: {tenant_id}')
|
||||||
|
# client = model_variables['transcription_client']
|
||||||
|
# model = model_variables['transcription_model']
|
||||||
def transcribe_audio(tenant_id, document_version, input_file, output_file, model_variables):
|
#
|
||||||
try:
|
# # Download the audio file from MinIO
|
||||||
current_app.logger.info(f'Transcribing audio for tenant: {tenant_id}')
|
# audio_data = minio_client.download_document_file(tenant_id, document_version.doc_id, document_version.language,
|
||||||
client = model_variables['transcription_client']
|
# document_version.id, input_file)
|
||||||
model = model_variables['transcription_model']
|
#
|
||||||
|
# # Load the audio data into pydub
|
||||||
# Download the audio file from MinIO
|
# audio = AudioSegment.from_mp3(io.BytesIO(audio_data))
|
||||||
audio_data = minio_client.download_document_file(tenant_id, document_version.doc_id, document_version.language,
|
#
|
||||||
document_version.id, input_file)
|
# # Define segment length (e.g., 10 minutes)
|
||||||
|
# segment_length = 10 * 60 * 1000 # 10 minutes in milliseconds
|
||||||
# Load the audio data into pydub
|
#
|
||||||
audio = AudioSegment.from_mp3(io.BytesIO(audio_data))
|
# transcriptions = []
|
||||||
|
#
|
||||||
# Define segment length (e.g., 10 minutes)
|
# # Split audio into segments and transcribe each
|
||||||
segment_length = 10 * 60 * 1000 # 10 minutes in milliseconds
|
# for i, chunk in enumerate(audio[::segment_length]):
|
||||||
|
# current_app.logger.debug(f'Transcribing chunk {i + 1} of {len(audio) // segment_length + 1}')
|
||||||
transcriptions = []
|
#
|
||||||
|
# with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio:
|
||||||
# Split audio into segments and transcribe each
|
# chunk.export(temp_audio.name, format="mp3")
|
||||||
for i, chunk in enumerate(audio[::segment_length]):
|
#
|
||||||
current_app.logger.debug(f'Transcribing chunk {i + 1} of {len(audio) // segment_length + 1}')
|
# with open(temp_audio.name, 'rb') as audio_segment:
|
||||||
|
# transcription = client.audio.transcriptions.create(
|
||||||
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio:
|
# file=audio_segment,
|
||||||
chunk.export(temp_audio.name, format="mp3")
|
# model=model,
|
||||||
|
# language=document_version.language,
|
||||||
with open(temp_audio.name, 'rb') as audio_segment:
|
# response_format='verbose_json',
|
||||||
transcription = client.audio.transcriptions.create(
|
# )
|
||||||
file=audio_segment,
|
#
|
||||||
model=model,
|
# transcriptions.append(transcription.text)
|
||||||
language=document_version.language,
|
#
|
||||||
response_format='verbose_json',
|
# os.unlink(temp_audio.name) # Delete the temporary file
|
||||||
)
|
#
|
||||||
|
# # Combine all transcriptions
|
||||||
transcriptions.append(transcription.text)
|
# full_transcription = " ".join(transcriptions)
|
||||||
|
#
|
||||||
os.unlink(temp_audio.name) # Delete the temporary file
|
# # Upload the full transcription to MinIO
|
||||||
|
# minio_client.upload_document_file(
|
||||||
# Combine all transcriptions
|
# tenant_id,
|
||||||
full_transcription = " ".join(transcriptions)
|
# document_version.doc_id,
|
||||||
|
# document_version.language,
|
||||||
# Upload the full transcription to MinIO
|
# document_version.id,
|
||||||
minio_client.upload_document_file(
|
# output_file,
|
||||||
tenant_id,
|
# full_transcription.encode('utf-8')
|
||||||
document_version.doc_id,
|
# )
|
||||||
document_version.language,
|
#
|
||||||
document_version.id,
|
# current_app.logger.info(f'Transcribed audio for tenant: {tenant_id}')
|
||||||
output_file,
|
# except Exception as e:
|
||||||
full_transcription.encode('utf-8')
|
# current_app.logger.error(f'Error transcribing audio for tenant: {tenant_id}, with error: {e}')
|
||||||
)
|
# raise
|
||||||
|
#
|
||||||
current_app.logger.info(f'Transcribed audio for tenant: {tenant_id}')
|
#
|
||||||
except Exception as e:
|
# def annotate_transcription(tenant, document_version, input_file, output_file, model_variables):
|
||||||
current_app.logger.error(f'Error transcribing audio for tenant: {tenant_id}, with error: {e}')
|
# try:
|
||||||
raise
|
# current_app.logger.debug(f'Annotating transcription for tenant {tenant.id}')
|
||||||
|
#
|
||||||
|
# char_splitter = CharacterTextSplitter(separator='.',
|
||||||
def annotate_transcription(tenant, document_version, input_file, output_file, model_variables):
|
# chunk_size=model_variables['annotation_chunk_length'],
|
||||||
try:
|
# chunk_overlap=0)
|
||||||
current_app.logger.debug(f'Annotating transcription for tenant {tenant.id}')
|
#
|
||||||
|
# headers_to_split_on = [
|
||||||
char_splitter = CharacterTextSplitter(separator='.',
|
# ("#", "Header 1"),
|
||||||
chunk_size=model_variables['annotation_chunk_length'],
|
# ("##", "Header 2"),
|
||||||
chunk_overlap=0)
|
# ]
|
||||||
|
# markdown_splitter = MarkdownHeaderTextSplitter(headers_to_split_on, strip_headers=False)
|
||||||
headers_to_split_on = [
|
#
|
||||||
("#", "Header 1"),
|
# llm = model_variables['llm']
|
||||||
("##", "Header 2"),
|
# template = model_variables['transcript_template']
|
||||||
]
|
# language_template = create_language_template(template, document_version.language)
|
||||||
markdown_splitter = MarkdownHeaderTextSplitter(headers_to_split_on, strip_headers=False)
|
# transcript_prompt = ChatPromptTemplate.from_template(language_template)
|
||||||
|
# setup = RunnablePassthrough()
|
||||||
llm = model_variables['llm']
|
# output_parser = StrOutputParser()
|
||||||
template = model_variables['transcript_template']
|
#
|
||||||
language_template = create_language_template(template, document_version.language)
|
# # Download the transcription file from MinIO
|
||||||
transcript_prompt = ChatPromptTemplate.from_template(language_template)
|
# transcript_data = minio_client.download_document_file(tenant.id, document_version.doc_id,
|
||||||
setup = RunnablePassthrough()
|
# document_version.language, document_version.id,
|
||||||
output_parser = StrOutputParser()
|
# input_file)
|
||||||
|
# transcript = transcript_data.decode('utf-8')
|
||||||
# Download the transcription file from MinIO
|
#
|
||||||
transcript_data = minio_client.download_document_file(tenant.id, document_version.doc_id,
|
# chain = setup | transcript_prompt | llm | output_parser
|
||||||
document_version.language, document_version.id,
|
#
|
||||||
input_file)
|
# chunks = char_splitter.split_text(transcript)
|
||||||
transcript = transcript_data.decode('utf-8')
|
# all_markdown_chunks = []
|
||||||
|
# last_markdown_chunk = ''
|
||||||
chain = setup | transcript_prompt | llm | output_parser
|
# for chunk in chunks:
|
||||||
|
# current_app.logger.debug(f'Annotating next chunk of {len(chunks)} for tenant {tenant.id}')
|
||||||
chunks = char_splitter.split_text(transcript)
|
# full_input = last_markdown_chunk + '\n' + chunk
|
||||||
all_markdown_chunks = []
|
# if tenant.embed_tuning:
|
||||||
last_markdown_chunk = ''
|
# current_app.embed_tuning_logger.debug(f'Annotating chunk: \n '
|
||||||
for chunk in chunks:
|
# f'------------------\n'
|
||||||
current_app.logger.debug(f'Annotating next chunk of {len(chunks)} for tenant {tenant.id}')
|
# f'{full_input}\n'
|
||||||
full_input = last_markdown_chunk + '\n' + chunk
|
# f'------------------\n')
|
||||||
if tenant.embed_tuning:
|
# input_transcript = {'transcript': full_input}
|
||||||
current_app.embed_tuning_logger.debug(f'Annotating chunk: \n '
|
# markdown = chain.invoke(input_transcript)
|
||||||
f'------------------\n'
|
# # GPT-4o returns some kind of content description: ```markdown <text> ```
|
||||||
f'{full_input}\n'
|
# if markdown.startswith("```markdown"):
|
||||||
f'------------------\n')
|
# markdown = "\n".join(markdown.strip().split("\n")[1:-1])
|
||||||
input_transcript = {'transcript': full_input}
|
# if tenant.embed_tuning:
|
||||||
markdown = chain.invoke(input_transcript)
|
# current_app.embed_tuning_logger.debug(f'Markdown Received: \n '
|
||||||
# GPT-4o returns some kind of content description: ```markdown <text> ```
|
# f'------------------\n'
|
||||||
if markdown.startswith("```markdown"):
|
# f'{markdown}\n'
|
||||||
markdown = "\n".join(markdown.strip().split("\n")[1:-1])
|
# f'------------------\n')
|
||||||
if tenant.embed_tuning:
|
# md_header_splits = markdown_splitter.split_text(markdown)
|
||||||
current_app.embed_tuning_logger.debug(f'Markdown Received: \n '
|
# markdown_chunks = [doc.page_content for doc in md_header_splits]
|
||||||
f'------------------\n'
|
# # claude-3.5-sonnet returns introductory text
|
||||||
f'{markdown}\n'
|
# if not markdown_chunks[0].startswith('#'):
|
||||||
f'------------------\n')
|
# markdown_chunks.pop(0)
|
||||||
md_header_splits = markdown_splitter.split_text(markdown)
|
# last_markdown_chunk = markdown_chunks[-1]
|
||||||
markdown_chunks = [doc.page_content for doc in md_header_splits]
|
# last_markdown_chunk = "\n".join(markdown.strip().split("\n")[1:])
|
||||||
# claude-3.5-sonnet returns introductory text
|
# markdown_chunks.pop()
|
||||||
if not markdown_chunks[0].startswith('#'):
|
# all_markdown_chunks += markdown_chunks
|
||||||
markdown_chunks.pop(0)
|
#
|
||||||
last_markdown_chunk = markdown_chunks[-1]
|
# all_markdown_chunks += [last_markdown_chunk]
|
||||||
last_markdown_chunk = "\n".join(markdown.strip().split("\n")[1:])
|
#
|
||||||
markdown_chunks.pop()
|
# annotated_transcript = '\n'.join(all_markdown_chunks)
|
||||||
all_markdown_chunks += markdown_chunks
|
#
|
||||||
|
# # Upload the annotated transcript to MinIO
|
||||||
all_markdown_chunks += [last_markdown_chunk]
|
# minio_client.upload_document_file(
|
||||||
|
# tenant.id,
|
||||||
annotated_transcript = '\n'.join(all_markdown_chunks)
|
# document_version.doc_id,
|
||||||
|
# document_version.language,
|
||||||
# Upload the annotated transcript to MinIO
|
# document_version.id,
|
||||||
minio_client.upload_document_file(
|
# output_file,
|
||||||
tenant.id,
|
# annotated_transcript.encode('utf-8')
|
||||||
document_version.doc_id,
|
# )
|
||||||
document_version.language,
|
#
|
||||||
document_version.id,
|
# current_app.logger.info(f'Annotated transcription for tenant {tenant.id}')
|
||||||
output_file,
|
# except Exception as e:
|
||||||
annotated_transcript.encode('utf-8')
|
# current_app.logger.error(f'Error annotating transcription for tenant {tenant.id}, with error: {e}')
|
||||||
)
|
# raise
|
||||||
|
|
||||||
current_app.logger.info(f'Annotated transcription for tenant {tenant.id}')
|
|
||||||
except Exception as e:
|
|
||||||
current_app.logger.error(f'Error annotating transcription for tenant {tenant.id}, with error: {e}')
|
|
||||||
raise
|
|
||||||
|
|
||||||
|
|
||||||
def create_potential_chunks_for_markdown(tenant_id, document_version, input_file):
|
def create_potential_chunks_for_markdown(tenant_id, document_version, input_file):
|
||||||
@@ -648,7 +561,3 @@ def combine_chunks_for_markdown(potential_chunks, min_chars, max_chars):
|
|||||||
actual_chunks.append(current_chunk)
|
actual_chunks.append(current_chunk)
|
||||||
|
|
||||||
return actual_chunks
|
return actual_chunks
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Binary file not shown.
@@ -3,7 +3,7 @@
|
|||||||
Plugin Name: EveAI Chat Widget
|
Plugin Name: EveAI Chat Widget
|
||||||
Plugin URI: https://askeveai.com/
|
Plugin URI: https://askeveai.com/
|
||||||
Description: Integrates the EveAI chat interface into your WordPress site.
|
Description: Integrates the EveAI chat interface into your WordPress site.
|
||||||
Version: 1.3.21
|
Version: 1.5.0
|
||||||
Author: Josako, Pieter Laroy
|
Author: Josako, Pieter Laroy
|
||||||
Author URI: https://askeveai.com/about/
|
Author URI: https://askeveai.com/about/
|
||||||
*/
|
*/
|
||||||
@@ -18,15 +18,30 @@ function eveai_chat_enqueue_scripts() {
|
|||||||
wp_enqueue_style('eveai-chat-style', plugin_dir_url(__FILE__) . 'css/eveai-chat-style.css');
|
wp_enqueue_style('eveai-chat-style', plugin_dir_url(__FILE__) . 'css/eveai-chat-style.css');
|
||||||
}
|
}
|
||||||
add_action('wp_enqueue_scripts', 'eveai_chat_enqueue_scripts');
|
add_action('wp_enqueue_scripts', 'eveai_chat_enqueue_scripts');
|
||||||
|
add_action('admin_enqueue_scripts', 'eveai_chat_enqueue_scripts');
|
||||||
|
|
||||||
// Shortcode function
|
// Shortcode function
|
||||||
function eveai_chat_shortcode($atts) {
|
function eveai_chat_shortcode($atts) {
|
||||||
$options = get_option('eveai_chat_options');
|
// Default values
|
||||||
$tenant_id = esc_js($options['tenant_id']);
|
$defaults = array(
|
||||||
$api_key = esc_js($options['api_key']);
|
'tenant_id' => '',
|
||||||
$domain = esc_js($options['domain']);
|
'api_key' => '',
|
||||||
$language = esc_js($options['language']);
|
'domain' => '',
|
||||||
$supported_languages = esc_js($options['supported_languages']);
|
'language' => 'en',
|
||||||
|
'supported_languages' => 'en,fr,de,es',
|
||||||
|
'server_url' => 'https://evie.askeveai.com'
|
||||||
|
);
|
||||||
|
|
||||||
|
// Merge provided attributes with defaults
|
||||||
|
$atts = shortcode_atts($defaults, $atts, 'eveai_chat');
|
||||||
|
|
||||||
|
// Sanitize inputs
|
||||||
|
$tenant_id = sanitize_text_field($atts['tenant_id']);
|
||||||
|
$api_key = sanitize_text_field($atts['api_key']);
|
||||||
|
$domain = esc_url_raw($atts['domain']);
|
||||||
|
$language = sanitize_text_field($atts['language']);
|
||||||
|
$supported_languages = sanitize_text_field($atts['supported_languages']);
|
||||||
|
$server_url = esc_url_raw($atts['server_url']);
|
||||||
|
|
||||||
// Generate a unique ID for this instance of the chat widget
|
// Generate a unique ID for this instance of the chat widget
|
||||||
$chat_id = 'chat-container-' . uniqid();
|
$chat_id = 'chat-container-' . uniqid();
|
||||||
@@ -39,7 +54,8 @@ function eveai_chat_shortcode($atts) {
|
|||||||
'$api_key',
|
'$api_key',
|
||||||
'$domain',
|
'$domain',
|
||||||
'$language',
|
'$language',
|
||||||
'$supported_languages'
|
'$supported_languages',
|
||||||
|
'$server_url'
|
||||||
);
|
);
|
||||||
eveAI.initializeChat('$chat_id');
|
eveAI.initializeChat('$chat_id');
|
||||||
});
|
});
|
||||||
@@ -49,80 +65,3 @@ function eveai_chat_shortcode($atts) {
|
|||||||
}
|
}
|
||||||
add_shortcode('eveai_chat', 'eveai_chat_shortcode');
|
add_shortcode('eveai_chat', 'eveai_chat_shortcode');
|
||||||
|
|
||||||
// Add admin menu
|
|
||||||
function eveai_chat_admin_menu() {
|
|
||||||
add_options_page('EveAI Chat Settings', 'EveAI Chat', 'manage_options', 'eveai-chat-settings', 'eveai_chat_settings_page');
|
|
||||||
}
|
|
||||||
add_action('admin_menu', 'eveai_chat_admin_menu');
|
|
||||||
|
|
||||||
// Settings page
|
|
||||||
function eveai_chat_settings_page() {
|
|
||||||
?>
|
|
||||||
<div class="wrap">
|
|
||||||
<h1>EveAI Chat Settings</h1>
|
|
||||||
<form method="post" action="options.php">
|
|
||||||
<?php
|
|
||||||
settings_fields('eveai_chat_options');
|
|
||||||
do_settings_sections('eveai-chat-settings');
|
|
||||||
submit_button();
|
|
||||||
?>
|
|
||||||
</form>
|
|
||||||
</div>
|
|
||||||
<?php
|
|
||||||
}
|
|
||||||
|
|
||||||
// Register settings
|
|
||||||
function eveai_chat_register_settings() {
|
|
||||||
register_setting('eveai_chat_options', 'eveai_chat_options', 'eveai_chat_options_validate');
|
|
||||||
|
|
||||||
add_settings_section('eveai_chat_main', 'Main Settings', 'eveai_chat_section_text', 'eveai-chat-settings');
|
|
||||||
|
|
||||||
add_settings_field('eveai_chat_tenant_id', 'Tenant ID', 'eveai_chat_tenant_id_input', 'eveai-chat-settings', 'eveai_chat_main');
|
|
||||||
add_settings_field('eveai_chat_api_key', 'API Key', 'eveai_chat_api_key_input', 'eveai-chat-settings', 'eveai_chat_main');
|
|
||||||
add_settings_field('eveai_chat_domain', 'Domain', 'eveai_chat_domain_input', 'eveai-chat-settings', 'eveai_chat_main');
|
|
||||||
add_settings_field('eveai_chat_language', 'Default Language', 'eveai_chat_language_input', 'eveai-chat-settings', 'eveai_chat_main');
|
|
||||||
add_settings_field('eveai_chat_supported_languages', 'Supported Languages', 'eveai_chat_supported_languages_input', 'eveai-chat-settings', 'eveai_chat_main');
|
|
||||||
}
|
|
||||||
add_action('admin_init', 'eveai_chat_register_settings');
|
|
||||||
|
|
||||||
function eveai_chat_section_text() {
|
|
||||||
echo '<p>Enter your EveAI Chat configuration details below:</p>';
|
|
||||||
}
|
|
||||||
|
|
||||||
function eveai_chat_tenant_id_input() {
|
|
||||||
$options = get_option('eveai_chat_options');
|
|
||||||
echo "<input id='eveai_chat_tenant_id' name='eveai_chat_options[tenant_id]' type='text' value='" . esc_attr($options['tenant_id']) . "' />";
|
|
||||||
}
|
|
||||||
|
|
||||||
function eveai_chat_api_key_input() {
|
|
||||||
$options = get_option('eveai_chat_options');
|
|
||||||
echo "<input id='eveai_chat_api_key' name='eveai_chat_options[api_key]' type='password' value='" . esc_attr($options['api_key']) . "' />";
|
|
||||||
}
|
|
||||||
|
|
||||||
function eveai_chat_domain_input() {
|
|
||||||
$options = get_option('eveai_chat_options');
|
|
||||||
echo "<input id='eveai_chat_domain' name='eveai_chat_options[domain]' type='text' value='" . esc_attr($options['domain']) . "' />";
|
|
||||||
}
|
|
||||||
|
|
||||||
function eveai_chat_language_input() {
|
|
||||||
$options = get_option('eveai_chat_options');
|
|
||||||
echo "<input id='eveai_chat_language' name='eveai_chat_options[language]' type='text' value='" . esc_attr($options['language']) . "' />";
|
|
||||||
}
|
|
||||||
|
|
||||||
function eveai_chat_supported_languages_input() {
|
|
||||||
$options = get_option('eveai_chat_options');
|
|
||||||
$supported_languages = isset($options['supported_languages']) ? $options['supported_languages'] : 'en,fr,de,es';
|
|
||||||
echo "<input id='eveai_chat_supported_languages' name='eveai_chat_options[supported_languages]' type='text' value='" . esc_attr($supported_languages) . "' />";
|
|
||||||
echo "<p class='description'>Enter comma-separated language codes (e.g., en,fr,de,es)</p>";
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
function eveai_chat_options_validate($input) {
|
|
||||||
$new_input = array();
|
|
||||||
$new_input['tenant_id'] = sanitize_text_field($input['tenant_id']);
|
|
||||||
$new_input['api_key'] = sanitize_text_field($input['api_key']);
|
|
||||||
$new_input['domain'] = esc_url_raw($input['domain']);
|
|
||||||
$new_input['language'] = sanitize_text_field($input['language']);
|
|
||||||
$new_input['supported_languages'] = sanitize_text_field($input['supported_languages']);
|
|
||||||
return $new_input;
|
|
||||||
}
|
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
class EveAIChatWidget extends HTMLElement {
|
class EveAIChatWidget extends HTMLElement {
|
||||||
static get observedAttributes() {
|
static get observedAttributes() {
|
||||||
return ['tenant-id', 'api-key', 'domain', 'language', 'languages'];
|
return ['tenant-id', 'api-key', 'domain', 'language', 'languages', 'server-url'];
|
||||||
}
|
}
|
||||||
|
|
||||||
constructor() {
|
constructor() {
|
||||||
@@ -87,6 +87,7 @@ class EveAIChatWidget extends HTMLElement {
|
|||||||
this.language = this.getAttribute('language');
|
this.language = this.getAttribute('language');
|
||||||
const languageAttr = this.getAttribute('languages');
|
const languageAttr = this.getAttribute('languages');
|
||||||
this.languages = languageAttr ? languageAttr.split(',') : [];
|
this.languages = languageAttr ? languageAttr.split(',') : [];
|
||||||
|
this.serverUrl = this.getAttribute('server-url');
|
||||||
this.currentLanguage = this.language;
|
this.currentLanguage = this.language;
|
||||||
console.log('Updated attributes:', {
|
console.log('Updated attributes:', {
|
||||||
tenantId: this.tenantId,
|
tenantId: this.tenantId,
|
||||||
@@ -94,7 +95,8 @@ class EveAIChatWidget extends HTMLElement {
|
|||||||
domain: this.domain,
|
domain: this.domain,
|
||||||
language: this.language,
|
language: this.language,
|
||||||
currentLanguage: this.currentLanguage,
|
currentLanguage: this.currentLanguage,
|
||||||
languages: this.languages
|
languages: this.languages,
|
||||||
|
serverUrl: this.serverUrl
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -104,14 +106,16 @@ class EveAIChatWidget extends HTMLElement {
|
|||||||
const domain = this.getAttribute('domain');
|
const domain = this.getAttribute('domain');
|
||||||
const language = this.getAttribute('language');
|
const language = this.getAttribute('language');
|
||||||
const languages = this.getAttribute('languages');
|
const languages = this.getAttribute('languages');
|
||||||
|
const serverUrl = this.getAttribute('server-url');
|
||||||
console.log('Checking if all attributes are set:', {
|
console.log('Checking if all attributes are set:', {
|
||||||
tenantId,
|
tenantId,
|
||||||
apiKey,
|
apiKey,
|
||||||
domain,
|
domain,
|
||||||
language,
|
language,
|
||||||
languages
|
languages,
|
||||||
|
serverUrl
|
||||||
});
|
});
|
||||||
return tenantId && apiKey && domain && language && languages;
|
return tenantId && apiKey && domain && language && languages && serverUrl;
|
||||||
}
|
}
|
||||||
|
|
||||||
createLanguageDropdown() {
|
createLanguageDropdown() {
|
||||||
@@ -142,7 +146,7 @@ class EveAIChatWidget extends HTMLElement {
|
|||||||
console.log(`Initializing socket connection to Evie`);
|
console.log(`Initializing socket connection to Evie`);
|
||||||
|
|
||||||
// Ensure apiKey is passed in the query parameters
|
// Ensure apiKey is passed in the query parameters
|
||||||
this.socket = io('https://evie.askeveai.com', {
|
this.socket = io(this.serverUrl, {
|
||||||
path: '/chat/socket.io/',
|
path: '/chat/socket.io/',
|
||||||
transports: ['websocket', 'polling'],
|
transports: ['websocket', 'polling'],
|
||||||
query: {
|
query: {
|
||||||
@@ -161,24 +165,32 @@ class EveAIChatWidget extends HTMLElement {
|
|||||||
|
|
||||||
this.socket.on('connect', (data) => {
|
this.socket.on('connect', (data) => {
|
||||||
console.log('Socket connected OK');
|
console.log('Socket connected OK');
|
||||||
|
console.log('Connect event data:', data);
|
||||||
|
console.log('Connect event this:', this);
|
||||||
this.setStatusMessage('Connected to EveAI.');
|
this.setStatusMessage('Connected to EveAI.');
|
||||||
this.updateConnectionStatus(true);
|
this.updateConnectionStatus(true);
|
||||||
this.startHeartbeat();
|
this.startHeartbeat();
|
||||||
if (data.room) {
|
if (data && data.room) {
|
||||||
this.room = data.room;
|
this.room = data.room;
|
||||||
console.log(`Joined room: ${this.room}`);
|
console.log(`Joined room: ${this.room}`);
|
||||||
|
} else {
|
||||||
|
console.log('Room information not received on connect');
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
this.socket.on('authenticated', (data) => {
|
this.socket.on('authenticated', (data) => {
|
||||||
console.log('Authenticated event received: ', data);
|
console.log('Authenticated event received');
|
||||||
|
console.log('Authentication event data:', data);
|
||||||
|
console.log('Authentication event this:', this);
|
||||||
this.setStatusMessage('Authenticated.');
|
this.setStatusMessage('Authenticated.');
|
||||||
if (data.token) {
|
if (data && data.token) {
|
||||||
this.jwtToken = data.token; // Store the JWT token received from the server
|
this.jwtToken = data.token;
|
||||||
}
|
}
|
||||||
if (data.room) {
|
if (data && data.room) {
|
||||||
this.room = data.room;
|
this.room = data.room;
|
||||||
console.log(`Confirmed room: ${this.room}`);
|
console.log(`Confirmed room: ${this.room}`);
|
||||||
|
} else {
|
||||||
|
console.log('Room information not received on authentication');
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@@ -1,13 +1,14 @@
|
|||||||
// static/js/eveai-sdk.js
|
// static/js/eveai-sdk.js
|
||||||
class EveAI {
|
class EveAI {
|
||||||
constructor(tenantId, apiKey, domain, language, languages) {
|
constructor(tenantId, apiKey, domain, language, languages, serverUrl) {
|
||||||
this.tenantId = tenantId;
|
this.tenantId = tenantId;
|
||||||
this.apiKey = apiKey;
|
this.apiKey = apiKey;
|
||||||
this.domain = domain;
|
this.domain = domain;
|
||||||
this.language = language;
|
this.language = language;
|
||||||
this.languages = languages;
|
this.languages = languages;
|
||||||
|
this.serverUrl = serverUrl;
|
||||||
|
|
||||||
console.log('EveAI constructor:', { tenantId, apiKey, domain, language, languages });
|
console.log('EveAI constructor:', { tenantId, apiKey, domain, language, languages, serverUrl });
|
||||||
}
|
}
|
||||||
|
|
||||||
initializeChat(containerId) {
|
initializeChat(containerId) {
|
||||||
@@ -21,6 +22,7 @@ class EveAI {
|
|||||||
chatWidget.setAttribute('domain', this.domain);
|
chatWidget.setAttribute('domain', this.domain);
|
||||||
chatWidget.setAttribute('language', this.language);
|
chatWidget.setAttribute('language', this.language);
|
||||||
chatWidget.setAttribute('languages', this.languages);
|
chatWidget.setAttribute('languages', this.languages);
|
||||||
|
chatWidget.setAttribute('server-url', this.serverUrl);
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
console.error('Container not found');
|
console.error('Container not found');
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ Contributors: Josako
|
|||||||
Tags: chat, ai
|
Tags: chat, ai
|
||||||
Requires at least: 5.0
|
Requires at least: 5.0
|
||||||
Tested up to: 5.9
|
Tested up to: 5.9
|
||||||
Stable tag: 1.3.0
|
Stable tag: 1.5.0
|
||||||
License: GPLv2 or later
|
License: GPLv2 or later
|
||||||
License URI: http://www.gnu.org/licenses/gpl-2.0.html
|
License URI: http://www.gnu.org/licenses/gpl-2.0.html
|
||||||
|
|
||||||
@@ -17,7 +17,18 @@ This plugin allows you to easily add the EveAI chat widget to your WordPress sit
|
|||||||
|
|
||||||
1. Upload the `eveai-chat-widget` folder to the `/wp-content/plugins/` directory
|
1. Upload the `eveai-chat-widget` folder to the `/wp-content/plugins/` directory
|
||||||
2. Activate the plugin through the 'Plugins' menu in WordPress
|
2. Activate the plugin through the 'Plugins' menu in WordPress
|
||||||
3. Go to Settings > EveAI Chat to configure your chat widget parameters
|
3. Add EveAI Chat Widget to your page or post using the instructions below.
|
||||||
|
|
||||||
|
== Usage ==
|
||||||
|
|
||||||
|
To add an EveAI Chat Widget to your page or post, use the following shortcode:
|
||||||
|
|
||||||
|
[eveai_chat tenant_id="YOUR_TENANT_ID" api_key="YOUR_API_KEY" domain="YOUR_DOMAIN" language="LANGUAGE_CODE" supported_languages="COMMA_SEPARATED_LANGUAGE_CODES" server_url="Server URL for Evie"]
|
||||||
|
|
||||||
|
Example:
|
||||||
|
[eveai_chat tenant_id="123456" api_key="your_api_key_here" domain="https://your-domain.com" language="en" supported_languages="en,fr,de,es" server_url="https://evie.askeveai.com"]
|
||||||
|
|
||||||
|
You can add multiple chat widgets with different configurations by using the shortcode multiple times with different parameters.
|
||||||
|
|
||||||
== Frequently Asked Questions ==
|
== Frequently Asked Questions ==
|
||||||
|
|
||||||
@@ -27,6 +38,16 @@ Contact your EveAI service provider to obtain your Tenant ID, API Key, and Domai
|
|||||||
|
|
||||||
== Changelog ==
|
== Changelog ==
|
||||||
|
|
||||||
|
= 1.5.0 =
|
||||||
|
* Allow for multiple servers to serve Evie
|
||||||
|
|
||||||
|
= 1.4.1 - 1.4...=
|
||||||
|
* Bug fixes
|
||||||
|
|
||||||
|
= 1.4.0 =
|
||||||
|
* Allow for multiple instances of Evie on the same website
|
||||||
|
* Parametrization of the shortcode
|
||||||
|
|
||||||
= 1.3.3 - =
|
= 1.3.3 - =
|
||||||
* ensure all attributes (also height and supportedLanguages) are set before initializing the socket
|
* ensure all attributes (also height and supportedLanguages) are set before initializing the socket
|
||||||
* Bugfixing
|
* Bugfixing
|
||||||
|
|||||||
@@ -0,0 +1,68 @@
|
|||||||
|
"""Include excluded_classes for Tenant
|
||||||
|
|
||||||
|
Revision ID: 110be45f6e44
|
||||||
|
Revises: 229774547fed
|
||||||
|
Create Date: 2024-08-22 07:37:40.591220
|
||||||
|
|
||||||
|
"""
|
||||||
|
from alembic import op
|
||||||
|
import sqlalchemy as sa
|
||||||
|
from sqlalchemy.dialects import postgresql
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision = '110be45f6e44'
|
||||||
|
down_revision = '229774547fed'
|
||||||
|
branch_labels = None
|
||||||
|
depends_on = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade():
|
||||||
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
|
with op.batch_alter_table('roles_users', schema=None) as batch_op:
|
||||||
|
batch_op.drop_constraint('roles_users_user_id_fkey', type_='foreignkey')
|
||||||
|
batch_op.drop_constraint('roles_users_role_id_fkey', type_='foreignkey')
|
||||||
|
batch_op.create_foreign_key(None, 'role', ['role_id'], ['id'], referent_schema='public', ondelete='CASCADE')
|
||||||
|
batch_op.create_foreign_key(None, 'user', ['user_id'], ['id'], referent_schema='public', ondelete='CASCADE')
|
||||||
|
|
||||||
|
with op.batch_alter_table('tenant', schema=None) as batch_op:
|
||||||
|
batch_op.add_column(sa.Column('html_excluded_classes', postgresql.ARRAY(sa.String(length=200)), nullable=True))
|
||||||
|
|
||||||
|
with op.batch_alter_table('tenant_domain', schema=None) as batch_op:
|
||||||
|
batch_op.drop_constraint('tenant_domain_updated_by_fkey', type_='foreignkey')
|
||||||
|
batch_op.drop_constraint('tenant_domain_tenant_id_fkey', type_='foreignkey')
|
||||||
|
batch_op.drop_constraint('tenant_domain_created_by_fkey', type_='foreignkey')
|
||||||
|
batch_op.create_foreign_key(None, 'user', ['updated_by'], ['id'], referent_schema='public')
|
||||||
|
batch_op.create_foreign_key(None, 'tenant', ['tenant_id'], ['id'], referent_schema='public')
|
||||||
|
batch_op.create_foreign_key(None, 'user', ['created_by'], ['id'], referent_schema='public')
|
||||||
|
|
||||||
|
with op.batch_alter_table('user', schema=None) as batch_op:
|
||||||
|
batch_op.drop_constraint('user_tenant_id_fkey', type_='foreignkey')
|
||||||
|
batch_op.create_foreign_key(None, 'tenant', ['tenant_id'], ['id'], referent_schema='public')
|
||||||
|
|
||||||
|
# ### end Alembic commands ###
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade():
|
||||||
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
|
with op.batch_alter_table('user', schema=None) as batch_op:
|
||||||
|
batch_op.drop_constraint(None, type_='foreignkey')
|
||||||
|
batch_op.create_foreign_key('user_tenant_id_fkey', 'tenant', ['tenant_id'], ['id'])
|
||||||
|
|
||||||
|
with op.batch_alter_table('tenant_domain', schema=None) as batch_op:
|
||||||
|
batch_op.drop_constraint(None, type_='foreignkey')
|
||||||
|
batch_op.drop_constraint(None, type_='foreignkey')
|
||||||
|
batch_op.drop_constraint(None, type_='foreignkey')
|
||||||
|
batch_op.create_foreign_key('tenant_domain_created_by_fkey', 'user', ['created_by'], ['id'])
|
||||||
|
batch_op.create_foreign_key('tenant_domain_tenant_id_fkey', 'tenant', ['tenant_id'], ['id'])
|
||||||
|
batch_op.create_foreign_key('tenant_domain_updated_by_fkey', 'user', ['updated_by'], ['id'])
|
||||||
|
|
||||||
|
with op.batch_alter_table('tenant', schema=None) as batch_op:
|
||||||
|
batch_op.drop_column('html_excluded_classes')
|
||||||
|
|
||||||
|
with op.batch_alter_table('roles_users', schema=None) as batch_op:
|
||||||
|
batch_op.drop_constraint(None, type_='foreignkey')
|
||||||
|
batch_op.drop_constraint(None, type_='foreignkey')
|
||||||
|
batch_op.create_foreign_key('roles_users_role_id_fkey', 'role', ['role_id'], ['id'], ondelete='CASCADE')
|
||||||
|
batch_op.create_foreign_key('roles_users_user_id_fkey', 'user', ['user_id'], ['id'], ondelete='CASCADE')
|
||||||
|
|
||||||
|
# ### end Alembic commands ###
|
||||||
@@ -0,0 +1,68 @@
|
|||||||
|
"""Add API Key to tenant - next to Chat API key
|
||||||
|
|
||||||
|
Revision ID: ce6f5b62bbfb
|
||||||
|
Revises: 110be45f6e44
|
||||||
|
Create Date: 2024-08-29 07:43:20.662983
|
||||||
|
|
||||||
|
"""
|
||||||
|
from alembic import op
|
||||||
|
import sqlalchemy as sa
|
||||||
|
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision = 'ce6f5b62bbfb'
|
||||||
|
down_revision = '110be45f6e44'
|
||||||
|
branch_labels = None
|
||||||
|
depends_on = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade():
|
||||||
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
|
with op.batch_alter_table('roles_users', schema=None) as batch_op:
|
||||||
|
batch_op.drop_constraint('roles_users_role_id_fkey', type_='foreignkey')
|
||||||
|
batch_op.drop_constraint('roles_users_user_id_fkey', type_='foreignkey')
|
||||||
|
batch_op.create_foreign_key(None, 'user', ['user_id'], ['id'], referent_schema='public', ondelete='CASCADE')
|
||||||
|
batch_op.create_foreign_key(None, 'role', ['role_id'], ['id'], referent_schema='public', ondelete='CASCADE')
|
||||||
|
|
||||||
|
with op.batch_alter_table('tenant', schema=None) as batch_op:
|
||||||
|
batch_op.add_column(sa.Column('encrypted_api_key', sa.String(length=500), nullable=True))
|
||||||
|
|
||||||
|
with op.batch_alter_table('tenant_domain', schema=None) as batch_op:
|
||||||
|
batch_op.drop_constraint('tenant_domain_tenant_id_fkey', type_='foreignkey')
|
||||||
|
batch_op.drop_constraint('tenant_domain_updated_by_fkey', type_='foreignkey')
|
||||||
|
batch_op.drop_constraint('tenant_domain_created_by_fkey', type_='foreignkey')
|
||||||
|
batch_op.create_foreign_key(None, 'user', ['updated_by'], ['id'], referent_schema='public')
|
||||||
|
batch_op.create_foreign_key(None, 'user', ['created_by'], ['id'], referent_schema='public')
|
||||||
|
batch_op.create_foreign_key(None, 'tenant', ['tenant_id'], ['id'], referent_schema='public')
|
||||||
|
|
||||||
|
with op.batch_alter_table('user', schema=None) as batch_op:
|
||||||
|
batch_op.drop_constraint('user_tenant_id_fkey', type_='foreignkey')
|
||||||
|
batch_op.create_foreign_key(None, 'tenant', ['tenant_id'], ['id'], referent_schema='public')
|
||||||
|
|
||||||
|
# ### end Alembic commands ###
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade():
|
||||||
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
|
with op.batch_alter_table('user', schema=None) as batch_op:
|
||||||
|
batch_op.drop_constraint(None, type_='foreignkey')
|
||||||
|
batch_op.create_foreign_key('user_tenant_id_fkey', 'tenant', ['tenant_id'], ['id'])
|
||||||
|
|
||||||
|
with op.batch_alter_table('tenant_domain', schema=None) as batch_op:
|
||||||
|
batch_op.drop_constraint(None, type_='foreignkey')
|
||||||
|
batch_op.drop_constraint(None, type_='foreignkey')
|
||||||
|
batch_op.drop_constraint(None, type_='foreignkey')
|
||||||
|
batch_op.create_foreign_key('tenant_domain_created_by_fkey', 'user', ['created_by'], ['id'])
|
||||||
|
batch_op.create_foreign_key('tenant_domain_updated_by_fkey', 'user', ['updated_by'], ['id'])
|
||||||
|
batch_op.create_foreign_key('tenant_domain_tenant_id_fkey', 'tenant', ['tenant_id'], ['id'])
|
||||||
|
|
||||||
|
with op.batch_alter_table('tenant', schema=None) as batch_op:
|
||||||
|
batch_op.drop_column('encrypted_api_key')
|
||||||
|
|
||||||
|
with op.batch_alter_table('roles_users', schema=None) as batch_op:
|
||||||
|
batch_op.drop_constraint(None, type_='foreignkey')
|
||||||
|
batch_op.drop_constraint(None, type_='foreignkey')
|
||||||
|
batch_op.create_foreign_key('roles_users_user_id_fkey', 'user', ['user_id'], ['id'], ondelete='CASCADE')
|
||||||
|
batch_op.create_foreign_key('roles_users_role_id_fkey', 'role', ['role_id'], ['id'], ondelete='CASCADE')
|
||||||
|
|
||||||
|
# ### end Alembic commands ###
|
||||||
@@ -1,3 +1,4 @@
|
|||||||
|
import inspect
|
||||||
import logging
|
import logging
|
||||||
from logging.config import fileConfig
|
from logging.config import fileConfig
|
||||||
|
|
||||||
@@ -5,12 +6,12 @@ from flask import current_app
|
|||||||
|
|
||||||
from alembic import context
|
from alembic import context
|
||||||
from sqlalchemy import NullPool, engine_from_config, text
|
from sqlalchemy import NullPool, engine_from_config, text
|
||||||
|
import sqlalchemy as sa
|
||||||
from common.models.user import Tenant
|
from sqlalchemy.sql import schema
|
||||||
|
|
||||||
import pgvector
|
import pgvector
|
||||||
from pgvector.sqlalchemy import Vector
|
from pgvector.sqlalchemy import Vector
|
||||||
|
|
||||||
|
from common.models import document, interaction, user
|
||||||
|
|
||||||
# this is the Alembic Config object, which provides
|
# this is the Alembic Config object, which provides
|
||||||
# access to the values within the .ini file in use.
|
# access to the values within the .ini file in use.
|
||||||
@@ -46,17 +47,34 @@ def get_engine_url():
|
|||||||
config.set_main_option('sqlalchemy.url', get_engine_url())
|
config.set_main_option('sqlalchemy.url', get_engine_url())
|
||||||
target_db = current_app.extensions['migrate'].db
|
target_db = current_app.extensions['migrate'].db
|
||||||
|
|
||||||
# other values from the config, defined by the needs of env.py,
|
|
||||||
# can be acquired:
|
def get_public_table_names():
|
||||||
# my_important_option = config.get_main_option("my_important_option")
|
# TODO: This function should include the necessary functionality to automatically retrieve table names
|
||||||
# ... etc.
|
return ['role', 'roles_users', 'tenant', 'user', 'tenant_domain']
|
||||||
|
|
||||||
|
|
||||||
|
PUBLIC_TABLES = get_public_table_names()
|
||||||
|
logger.info(f"Public tables: {PUBLIC_TABLES}")
|
||||||
|
|
||||||
|
|
||||||
|
def include_object(object, name, type_, reflected, compare_to):
|
||||||
|
if type_ == "table" and name in PUBLIC_TABLES:
|
||||||
|
logger.info(f"Excluding public table: {name}")
|
||||||
|
return False
|
||||||
|
logger.info(f"Including object: {type_} {name}")
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
# List of Tenants
|
# List of Tenants
|
||||||
tenants_ = Tenant.query.all()
|
def get_tenant_ids():
|
||||||
if tenants_:
|
from common.models.user import Tenant
|
||||||
tenants = [tenant.id for tenant in tenants_]
|
|
||||||
else:
|
tenants_ = Tenant.query.all()
|
||||||
tenants = []
|
if tenants_:
|
||||||
|
tenants = [tenant.id for tenant in tenants_]
|
||||||
|
else:
|
||||||
|
tenants = []
|
||||||
|
return tenants
|
||||||
|
|
||||||
|
|
||||||
def get_metadata():
|
def get_metadata():
|
||||||
@@ -79,7 +97,10 @@ def run_migrations_offline():
|
|||||||
"""
|
"""
|
||||||
url = config.get_main_option("sqlalchemy.url")
|
url = config.get_main_option("sqlalchemy.url")
|
||||||
context.configure(
|
context.configure(
|
||||||
url=url, target_metadata=get_metadata(), literal_binds=True
|
url=url,
|
||||||
|
target_metadata=get_metadata(),
|
||||||
|
# literal_binds=True,
|
||||||
|
include_object=include_object,
|
||||||
)
|
)
|
||||||
|
|
||||||
with context.begin_transaction():
|
with context.begin_transaction():
|
||||||
@@ -99,18 +120,8 @@ def run_migrations_online():
|
|||||||
poolclass=NullPool,
|
poolclass=NullPool,
|
||||||
)
|
)
|
||||||
|
|
||||||
def process_revision_directives(context, revision, directives):
|
|
||||||
if config.cmd_opts.autogenerate:
|
|
||||||
script = directives[0]
|
|
||||||
if script.upgrade_ops is not None:
|
|
||||||
# Add import for pgvector and set search path
|
|
||||||
script.upgrade_ops.ops.insert(0, sa.schema.ExecuteSQLOp(
|
|
||||||
"SET search_path TO CURRENT_SCHEMA(), public; IMPORT pgvector",
|
|
||||||
execution_options=None
|
|
||||||
))
|
|
||||||
|
|
||||||
with connectable.connect() as connection:
|
with connectable.connect() as connection:
|
||||||
print(tenants)
|
tenants = get_tenant_ids()
|
||||||
for tenant in tenants:
|
for tenant in tenants:
|
||||||
logger.info(f"Migrating tenant: {tenant}")
|
logger.info(f"Migrating tenant: {tenant}")
|
||||||
# set search path on the connection, which ensures that
|
# set search path on the connection, which ensures that
|
||||||
@@ -127,7 +138,8 @@ def run_migrations_online():
|
|||||||
context.configure(
|
context.configure(
|
||||||
connection=connection,
|
connection=connection,
|
||||||
target_metadata=get_metadata(),
|
target_metadata=get_metadata(),
|
||||||
process_revision_directives=process_revision_directives,
|
# literal_binds=True,
|
||||||
|
include_object=include_object,
|
||||||
)
|
)
|
||||||
|
|
||||||
with context.begin_transaction():
|
with context.begin_transaction():
|
||||||
|
|||||||
@@ -0,0 +1,27 @@
|
|||||||
|
"""Ensure logging information in document domain does not require user
|
||||||
|
|
||||||
|
Revision ID: 43eac8a7a00b
|
||||||
|
Revises: 5d5437d81041
|
||||||
|
Create Date: 2024-09-03 09:36:06.541938
|
||||||
|
|
||||||
|
"""
|
||||||
|
from alembic import op
|
||||||
|
import sqlalchemy as sa
|
||||||
|
import pgvector
|
||||||
|
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision = '43eac8a7a00b'
|
||||||
|
down_revision = '5d5437d81041'
|
||||||
|
branch_labels = None
|
||||||
|
depends_on = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade():
|
||||||
|
# Manual upgrade commands
|
||||||
|
op.execute('ALTER TABLE document ALTER COLUMN created_by DROP NOT NULL')
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade():
|
||||||
|
# Manual downgrade commands
|
||||||
|
op.execute('ALTER TABLE document ALTER COLUMN created_by SET NOT NULL')
|
||||||
@@ -32,7 +32,7 @@ http {
|
|||||||
#keepalive_timeout 0;
|
#keepalive_timeout 0;
|
||||||
keepalive_timeout 65;
|
keepalive_timeout 65;
|
||||||
|
|
||||||
client_max_body_size 16M;
|
client_max_body_size 50M;
|
||||||
|
|
||||||
#gzip on;
|
#gzip on;
|
||||||
|
|
||||||
|
|||||||
30
nginx/public/chat_evie.html
Normal file
30
nginx/public/chat_evie.html
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>Chat Client Evie</title>
|
||||||
|
<link href="https://fonts.googleapis.com/icon?family=Material+Icons" rel="stylesheet">
|
||||||
|
<script src="https://cdn.socket.io/4.0.1/socket.io.min.js"></script>
|
||||||
|
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
|
||||||
|
<script src="/static/js/eveai-sdk.js" defer></script>
|
||||||
|
<script src="/static/js/eveai-chat-widget.js" defer></script>
|
||||||
|
<link rel="stylesheet" href="/static/css/eveai-chat-style.css">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div id="chat-container"></div>
|
||||||
|
<script>
|
||||||
|
document.addEventListener('DOMContentLoaded', function() {
|
||||||
|
const eveAI = new EveAI(
|
||||||
|
'2',
|
||||||
|
'EveAI-CHAT-9079-8604-7441-6496-7604',
|
||||||
|
'http://macstudio.ask-eve-ai-local.com',
|
||||||
|
'en',
|
||||||
|
'en,fr,nl',
|
||||||
|
'http://macstudio.ask-eve-ai-local.com:8080/'
|
||||||
|
);
|
||||||
|
eveAI.initializeChat('chat-container');
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
@@ -386,6 +386,7 @@ input[type="radio"] {
|
|||||||
.btn-danger:hover {
|
.btn-danger:hover {
|
||||||
background-color: darken(var(--bs-danger), 10%) !important; /* Darken the background on hover */
|
background-color: darken(var(--bs-danger), 10%) !important; /* Darken the background on hover */
|
||||||
border-color: darken(var(--bs-danger), 10%) !important; /* Darken the border on hover */
|
border-color: darken(var(--bs-danger), 10%) !important; /* Darken the border on hover */
|
||||||
|
color: var(--bs-white) !important; /* Ensure the text remains white and readable */
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Success Alert Styling */
|
/* Success Alert Styling */
|
||||||
@@ -420,3 +421,90 @@ input[type="radio"] {
|
|||||||
box-shadow: none;
|
box-shadow: none;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Custom styles for chat session view */
|
||||||
|
.accordion-button:not(.collapsed) {
|
||||||
|
background-color: var(--bs-primary);
|
||||||
|
color: var(--bs-white);
|
||||||
|
}
|
||||||
|
|
||||||
|
.accordion-button:focus {
|
||||||
|
box-shadow: 0 0 0 0.25rem rgba(118, 89, 154, 0.25);
|
||||||
|
}
|
||||||
|
|
||||||
|
.interaction-question {
|
||||||
|
font-size: 1rem; /* Normal text size */
|
||||||
|
}
|
||||||
|
|
||||||
|
.interaction-icons {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
}
|
||||||
|
|
||||||
|
.interaction-icons .material-icons {
|
||||||
|
font-size: 24px;
|
||||||
|
margin-left: 8px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.thumb-icon.filled {
|
||||||
|
color: var(--bs-success);
|
||||||
|
}
|
||||||
|
|
||||||
|
.thumb-icon.outlined {
|
||||||
|
color: var(--thumb-icon-outlined);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Algorithm icon colors */
|
||||||
|
.algorithm-icon.rag_tenant {
|
||||||
|
color: var(--algorithm-color-rag-tenant);
|
||||||
|
}
|
||||||
|
|
||||||
|
.algorithm-icon.rag_wikipedia {
|
||||||
|
color: var(--algorithm-color-rag-wikipedia);
|
||||||
|
}
|
||||||
|
|
||||||
|
.algorithm-icon.rag_google {
|
||||||
|
color: var(--algorithm-color-rag-google);
|
||||||
|
}
|
||||||
|
|
||||||
|
.algorithm-icon.llm {
|
||||||
|
color: var(--algorithm-color-llm);
|
||||||
|
}
|
||||||
|
|
||||||
|
.accordion-body {
|
||||||
|
background-color: var(--bs-light);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Markdown content styles */
|
||||||
|
.markdown-content {
|
||||||
|
font-size: 1rem;
|
||||||
|
line-height: 1.5;
|
||||||
|
}
|
||||||
|
|
||||||
|
.markdown-content p {
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.markdown-content h1, .markdown-content h2, .markdown-content h3,
|
||||||
|
.markdown-content h4, .markdown-content h5, .markdown-content h6 {
|
||||||
|
margin-top: 1.5rem;
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.markdown-content ul, .markdown-content ol {
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
padding-left: 2rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.markdown-content code {
|
||||||
|
background-color: #f8f9fa;
|
||||||
|
padding: 0.2em 0.4em;
|
||||||
|
border-radius: 3px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.markdown-content pre {
|
||||||
|
background-color: #f8f9fa;
|
||||||
|
padding: 1rem;
|
||||||
|
border-radius: 5px;
|
||||||
|
overflow-x: auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,294 +0,0 @@
|
|||||||
/* eveai_chat.css */
|
|
||||||
:root {
|
|
||||||
--user-message-bg: #292929; /* Default user message background color */
|
|
||||||
--bot-message-bg: #1e1e1e; /* Default bot message background color */
|
|
||||||
--chat-bg: #1e1e1e; /* Default chat background color */
|
|
||||||
--status-line-color: #e9e9e9; /* Color for the status line text */
|
|
||||||
--status-line-bg: #1e1e1e; /* Background color for the status line */
|
|
||||||
--status-line-height: 30px; /* Fixed height for the status line */
|
|
||||||
|
|
||||||
--algorithm-color-rag-tenant: #0f0; /* Green for RAG_TENANT */
|
|
||||||
--algorithm-color-rag-wikipedia: #00f; /* Blue for RAG_WIKIPEDIA */
|
|
||||||
--algorithm-color-rag-google: #ff0; /* Yellow for RAG_GOOGLE */
|
|
||||||
--algorithm-color-llm: #800080; /* Purple for RAG_LLM */
|
|
||||||
|
|
||||||
/*--font-family: 'Arial, sans-serif'; !* Default font family *!*/
|
|
||||||
--font-family: 'Segoe UI, Roboto, Cantarell, Noto Sans, Apple Color Emoji, Segoe UI Emoji, Segoe UI Symbol';
|
|
||||||
--font-color: #e9e9e9; /* Default font color */
|
|
||||||
--user-message-font-color: #e9e9e9; /* User message font color */
|
|
||||||
--bot-message-font-color: #e9e9e9; /* Bot message font color */
|
|
||||||
--input-bg: #292929; /* Input background color */
|
|
||||||
--input-border: #ccc; /* Input border color */
|
|
||||||
--input-text-color: #e9e9e9; /* Input text color */
|
|
||||||
--button-color: #007bff; /* Button text color */
|
|
||||||
|
|
||||||
/* Variables for hyperlink backgrounds */
|
|
||||||
--link-bg: #1e1e1e; /* Default background color for hyperlinks */
|
|
||||||
--link-hover-bg: #1e1e1e; /* Background color on hover for hyperlinks */
|
|
||||||
--link-color: #dec981; /* Default text color for hyperlinks */
|
|
||||||
--link-hover-color: #D68F53; /* Text color on hover for hyperlinks */
|
|
||||||
|
|
||||||
/* New scrollbar variables */
|
|
||||||
--scrollbar-bg: #292929; /* Background color for the scrollbar track */
|
|
||||||
--scrollbar-thumb: #4b4b4b; /* Color for the scrollbar thumb */
|
|
||||||
--scrollbar-thumb-hover: #dec981; /* Color for the thumb on hover */
|
|
||||||
--scrollbar-thumb-active: #D68F53; /* Color for the thumb when active (dragged) */
|
|
||||||
|
|
||||||
/* Thumb colors */
|
|
||||||
--thumb-icon-outlined: #4b4b4b;
|
|
||||||
--thumb-icon-filled: #e9e9e9;
|
|
||||||
|
|
||||||
/* Connection Status colors */
|
|
||||||
--status-connected-color: #28a745; /* Green color for connected status */
|
|
||||||
--status-disconnected-color: #ffc107; /* Orange color for disconnected status */
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Connection status styles */
|
|
||||||
.connection-status-icon {
|
|
||||||
vertical-align: middle;
|
|
||||||
font-size: 24px;
|
|
||||||
margin-right: 8px;
|
|
||||||
}
|
|
||||||
|
|
||||||
.status-connected {
|
|
||||||
color: var(--status-connected-color);
|
|
||||||
}
|
|
||||||
|
|
||||||
.status-disconnected {
|
|
||||||
color: var(--status-disconnected-color);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Custom scrollbar styles */
|
|
||||||
.messages-area::-webkit-scrollbar {
|
|
||||||
width: 12px; /* Width of the scrollbar */
|
|
||||||
}
|
|
||||||
|
|
||||||
.messages-area::-webkit-scrollbar-track {
|
|
||||||
background: var(--scrollbar-bg); /* Background color for the track */
|
|
||||||
}
|
|
||||||
|
|
||||||
.messages-area::-webkit-scrollbar-thumb {
|
|
||||||
background-color: var(--scrollbar-thumb); /* Color of the thumb */
|
|
||||||
border-radius: 10px; /* Rounded corners for the thumb */
|
|
||||||
border: 3px solid var(--scrollbar-bg); /* Space around the thumb */
|
|
||||||
}
|
|
||||||
|
|
||||||
.messages-area::-webkit-scrollbar-thumb:hover {
|
|
||||||
background-color: var(--scrollbar-thumb-hover); /* Color when hovering over the thumb */
|
|
||||||
}
|
|
||||||
|
|
||||||
.messages-area::-webkit-scrollbar-thumb:active {
|
|
||||||
background-color: var(--scrollbar-thumb-active); /* Color when active (dragging) */
|
|
||||||
}
|
|
||||||
|
|
||||||
/* For Firefox */
|
|
||||||
.messages-area {
|
|
||||||
scrollbar-width: thin; /* Make scrollbar thinner */
|
|
||||||
scrollbar-color: var(--scrollbar-thumb) var(--scrollbar-bg); /* Thumb and track colors */
|
|
||||||
}
|
|
||||||
|
|
||||||
/* General Styles */
|
|
||||||
.chat-container {
|
|
||||||
display: flex;
|
|
||||||
flex-direction: column;
|
|
||||||
height: 75vh;
|
|
||||||
/*max-height: 100vh;*/
|
|
||||||
max-width: 600px;
|
|
||||||
margin: auto;
|
|
||||||
border: 1px solid #ccc;
|
|
||||||
border-radius: 8px;
|
|
||||||
overflow: hidden;
|
|
||||||
background-color: var(--chat-bg);
|
|
||||||
font-family: var(--font-family); /* Apply the default font family */
|
|
||||||
color: var(--font-color); /* Apply the default font color */
|
|
||||||
}
|
|
||||||
|
|
||||||
.disclaimer {
|
|
||||||
font-size: 0.7em;
|
|
||||||
text-align: right;
|
|
||||||
padding: 5px 20px 5px 5px;
|
|
||||||
margin-bottom: 5px;
|
|
||||||
}
|
|
||||||
|
|
||||||
.messages-area {
|
|
||||||
flex: 1;
|
|
||||||
overflow-y: auto;
|
|
||||||
padding: 10px;
|
|
||||||
background-color: var(--bot-message-bg);
|
|
||||||
}
|
|
||||||
|
|
||||||
.message {
|
|
||||||
max-width: 90%;
|
|
||||||
margin-bottom: 10px;
|
|
||||||
padding: 10px;
|
|
||||||
border-radius: 15px;
|
|
||||||
font-size: 1rem;
|
|
||||||
}
|
|
||||||
|
|
||||||
.message.user {
|
|
||||||
margin-left: auto;
|
|
||||||
background-color: var(--user-message-bg);
|
|
||||||
color: var(--user-message-font-color); /* Apply user message font color */
|
|
||||||
}
|
|
||||||
|
|
||||||
.message.bot {
|
|
||||||
background-color: var(--bot-message-bg);
|
|
||||||
color: var(--bot-message-font-color); /* Apply bot message font color */
|
|
||||||
}
|
|
||||||
|
|
||||||
.message-icons {
|
|
||||||
display: flex;
|
|
||||||
align-items: center;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Scoped styles for thumb icons */
|
|
||||||
.thumb-icon.outlined {
|
|
||||||
color: var(--thumb-icon-outlined); /* Color for outlined state */
|
|
||||||
}
|
|
||||||
|
|
||||||
.thumb-icon.filled {
|
|
||||||
color: var(--thumb-icon-filled); /* Color for filled state */
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Default styles for material icons */
|
|
||||||
.material-icons {
|
|
||||||
font-size: 24px;
|
|
||||||
vertical-align: middle;
|
|
||||||
cursor: pointer;
|
|
||||||
}
|
|
||||||
|
|
||||||
.question-area {
|
|
||||||
display: flex;
|
|
||||||
flex-direction: row;
|
|
||||||
align-items: center;
|
|
||||||
background-color: var(--user-message-bg);
|
|
||||||
padding: 10px;
|
|
||||||
}
|
|
||||||
|
|
||||||
.language-select-container {
|
|
||||||
width: 100%;
|
|
||||||
margin-bottom: 10px; /* Spacing between the dropdown and the textarea */
|
|
||||||
}
|
|
||||||
|
|
||||||
.language-select {
|
|
||||||
width: 100%;
|
|
||||||
margin-bottom: 5px; /* Space between the dropdown and the send button */
|
|
||||||
padding: 8px;
|
|
||||||
border-radius: 5px;
|
|
||||||
border: 1px solid var(--input-border);
|
|
||||||
background-color: var(--input-bg);
|
|
||||||
color: var(--input-text-color);
|
|
||||||
font-size: 1rem;
|
|
||||||
}
|
|
||||||
|
|
||||||
.question-area textarea {
|
|
||||||
flex: 1;
|
|
||||||
border: none;
|
|
||||||
padding: 10px;
|
|
||||||
border-radius: 15px;
|
|
||||||
background-color: var(--input-bg);
|
|
||||||
border: 1px solid var(--input-border);
|
|
||||||
color: var(--input-text-color);
|
|
||||||
font-family: var(--font-family); /* Apply the default font family */
|
|
||||||
font-size: 1rem;
|
|
||||||
resize: vertical;
|
|
||||||
min-height: 60px;
|
|
||||||
max-height: 150px;
|
|
||||||
overflow-y: auto;
|
|
||||||
margin-right: 10px; /* Space between textarea and right-side container */
|
|
||||||
}
|
|
||||||
|
|
||||||
.right-side {
|
|
||||||
display: flex;
|
|
||||||
flex-direction: column;
|
|
||||||
align-items: center;
|
|
||||||
}
|
|
||||||
|
|
||||||
.question-area button {
|
|
||||||
background: none;
|
|
||||||
border: none;
|
|
||||||
cursor: pointer;
|
|
||||||
color: var(--button-color);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Styles for the send icon */
|
|
||||||
.send-icon {
|
|
||||||
font-size: 24px;
|
|
||||||
color: var(--button-color);
|
|
||||||
cursor: pointer;
|
|
||||||
}
|
|
||||||
|
|
||||||
.send-icon.disabled {
|
|
||||||
color: grey; /* Color for the disabled state */
|
|
||||||
cursor: not-allowed; /* Change cursor to indicate disabled state */
|
|
||||||
}
|
|
||||||
|
|
||||||
/* New CSS for the status-line */
|
|
||||||
.status-line {
|
|
||||||
height: var(--status-line-height); /* Fixed height for the status line */
|
|
||||||
padding: 5px 10px;
|
|
||||||
background-color: var(--status-line-bg); /* Background color */
|
|
||||||
color: var(--status-line-color); /* Text color */
|
|
||||||
font-size: 0.9rem; /* Slightly smaller font size */
|
|
||||||
text-align: center; /* Centered text */
|
|
||||||
border-top: 1px solid #ccc; /* Subtle top border */
|
|
||||||
display: flex;
|
|
||||||
align-items: center;
|
|
||||||
justify-content: flex-start;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Algorithm-specific colors for fingerprint icon */
|
|
||||||
.fingerprint-rag-tenant {
|
|
||||||
color: var(--algorithm-color-rag-tenant);
|
|
||||||
}
|
|
||||||
|
|
||||||
.fingerprint-rag-wikipedia {
|
|
||||||
color: var(--algorithm-color-rag-wikipedia);
|
|
||||||
}
|
|
||||||
|
|
||||||
.fingerprint-rag-google {
|
|
||||||
color: var(--algorithm-color-rag-google);
|
|
||||||
}
|
|
||||||
|
|
||||||
.fingerprint-llm {
|
|
||||||
color: var(--algorithm-color-llm);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Styling for citation links */
|
|
||||||
.citations a {
|
|
||||||
background-color: var(--link-bg); /* Apply default background color */
|
|
||||||
color: var(--link-color); /* Apply default link color */
|
|
||||||
padding: 2px 4px; /* Add padding for better appearance */
|
|
||||||
border-radius: 3px; /* Add slight rounding for a modern look */
|
|
||||||
text-decoration: none; /* Remove default underline */
|
|
||||||
transition: background-color 0.3s, color 0.3s; /* Smooth transition for hover effects */
|
|
||||||
}
|
|
||||||
|
|
||||||
.citations a:hover {
|
|
||||||
background-color: var(--link-hover-bg); /* Background color on hover */
|
|
||||||
color: var(--link-hover-color); /* Text color on hover */
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Media queries for responsiveness */
|
|
||||||
@media (max-width: 768px) {
|
|
||||||
.chat-container {
|
|
||||||
max-width: 90%; /* Reduce max width on smaller screens */
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@media (max-width: 480px) {
|
|
||||||
.chat-container {
|
|
||||||
max-width: 95%; /* Further reduce max width on very small screens */
|
|
||||||
}
|
|
||||||
|
|
||||||
.question-area input {
|
|
||||||
font-size: 0.9rem; /* Adjust input font size for smaller screens */
|
|
||||||
}
|
|
||||||
|
|
||||||
.status-line {
|
|
||||||
font-size: 0.8rem; /* Adjust status line font size for smaller screens */
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -1,470 +0,0 @@
|
|||||||
class EveAIChatWidget extends HTMLElement {
|
|
||||||
static get observedAttributes() {
|
|
||||||
return ['tenant-id', 'api-key', 'domain', 'language', 'languages'];
|
|
||||||
}
|
|
||||||
|
|
||||||
constructor() {
|
|
||||||
super();
|
|
||||||
this.socket = null; // Initialize socket to null
|
|
||||||
this.attributesSet = false; // Flag to check if all attributes are set
|
|
||||||
this.jwtToken = null; // Initialize jwtToken to null
|
|
||||||
this.userTimezone = Intl.DateTimeFormat().resolvedOptions().timeZone; // Detect user's timezone
|
|
||||||
this.heartbeatInterval = null;
|
|
||||||
this.idleTime = 0; // in milliseconds
|
|
||||||
this.maxConnectionIdleTime = 1 * 60 * 60 * 1000; // 1 hours in milliseconds
|
|
||||||
this.languages = []
|
|
||||||
this.room = null;
|
|
||||||
console.log('EveAIChatWidget constructor called');
|
|
||||||
}
|
|
||||||
|
|
||||||
connectedCallback() {
|
|
||||||
console.log('connectedCallback called');
|
|
||||||
this.innerHTML = this.getTemplate();
|
|
||||||
this.messagesArea = this.querySelector('.messages-area');
|
|
||||||
this.questionInput = this.querySelector('.question-area textarea');
|
|
||||||
this.sendButton = this.querySelector('.send-icon');
|
|
||||||
this.languageSelect = this.querySelector('.language-select');
|
|
||||||
this.statusLine = this.querySelector('.status-line');
|
|
||||||
this.statusMessage = this.querySelector('.status-message');
|
|
||||||
this.connectionStatusIcon = this.querySelector('.connection-status-icon');
|
|
||||||
|
|
||||||
this.sendButton.addEventListener('click', () => this.handleSendMessage());
|
|
||||||
this.questionInput.addEventListener('keydown', (event) => {
|
|
||||||
if (event.key === 'Enter' && !event.shiftKey) {
|
|
||||||
event.preventDefault(); // Prevent adding a new line
|
|
||||||
this.handleSendMessage();
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
if (this.areAllAttributesSet() && !this.socket) {
|
|
||||||
console.log('Attributes already set in connectedCallback, initializing socket');
|
|
||||||
this.initializeSocket();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
populateLanguageDropdown() {
|
|
||||||
// Clear existing options
|
|
||||||
this.languageSelect.innerHTML = '';
|
|
||||||
console.log(`languages for options: ${this.languages}`)
|
|
||||||
|
|
||||||
// Populate with new options
|
|
||||||
this.languages.forEach(lang => {
|
|
||||||
const option = document.createElement('option');
|
|
||||||
option.value = lang;
|
|
||||||
option.textContent = lang.toUpperCase();
|
|
||||||
if (lang === this.currentLanguage) {
|
|
||||||
option.selected = true;
|
|
||||||
}
|
|
||||||
console.log(`Adding option for language: ${lang}`)
|
|
||||||
this.languageSelect.appendChild(option);
|
|
||||||
});
|
|
||||||
|
|
||||||
// Add event listener for language change
|
|
||||||
this.languageSelect.addEventListener('change', (e) => {
|
|
||||||
this.currentLanguage = e.target.value;
|
|
||||||
// You might want to emit an event or update the backend about the language change
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
attributeChangedCallback(name, oldValue, newValue) {
|
|
||||||
console.log(`attributeChangedCallback called: ${name} changed from ${oldValue} to ${newValue}`);
|
|
||||||
this.updateAttributes();
|
|
||||||
|
|
||||||
if (this.areAllAttributesSet() && !this.socket) {
|
|
||||||
console.log('All attributes set in attributeChangedCallback, initializing socket');
|
|
||||||
this.attributesSet = true;
|
|
||||||
console.log('All attributes are set, populating language dropdown');
|
|
||||||
this.populateLanguageDropdown();
|
|
||||||
console.log('All attributes are set, initializing socket')
|
|
||||||
this.initializeSocket();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
updateAttributes() {
|
|
||||||
this.tenantId = this.getAttribute('tenant-id');
|
|
||||||
this.apiKey = this.getAttribute('api-key');
|
|
||||||
this.domain = this.getAttribute('domain');
|
|
||||||
this.language = this.getAttribute('language');
|
|
||||||
const languageAttr = this.getAttribute('languages');
|
|
||||||
this.languages = languageAttr ? languageAttr.split(',') : [];
|
|
||||||
this.currentLanguage = this.language;
|
|
||||||
console.log('Updated attributes:', {
|
|
||||||
tenantId: this.tenantId,
|
|
||||||
apiKey: this.apiKey,
|
|
||||||
domain: this.domain,
|
|
||||||
language: this.language,
|
|
||||||
currentLanguage: this.currentLanguage,
|
|
||||||
languages: this.languages
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
areAllAttributesSet() {
|
|
||||||
const tenantId = this.getAttribute('tenant-id');
|
|
||||||
const apiKey = this.getAttribute('api-key');
|
|
||||||
const domain = this.getAttribute('domain');
|
|
||||||
const language = this.getAttribute('language');
|
|
||||||
const languages = this.getAttribute('languages');
|
|
||||||
console.log('Checking if all attributes are set:', {
|
|
||||||
tenantId,
|
|
||||||
apiKey,
|
|
||||||
domain,
|
|
||||||
language,
|
|
||||||
languages
|
|
||||||
});
|
|
||||||
return tenantId && apiKey && domain && language && languages;
|
|
||||||
}
|
|
||||||
|
|
||||||
createLanguageDropdown() {
|
|
||||||
const select = document.createElement('select');
|
|
||||||
select.id = 'languageSelect';
|
|
||||||
this.languages.forEach(lang => {
|
|
||||||
const option = document.createElement('option');
|
|
||||||
option.value = lang;
|
|
||||||
option.textContent = lang.toUpperCase();
|
|
||||||
if (lang === this.currentLanguage) {
|
|
||||||
option.selected = true;
|
|
||||||
}
|
|
||||||
select.appendChild(option);
|
|
||||||
});
|
|
||||||
select.addEventListener('change', (e) => {
|
|
||||||
this.currentLanguage = e.target.value;
|
|
||||||
// You might want to emit an event or update the backend about the language change
|
|
||||||
});
|
|
||||||
return select;
|
|
||||||
}
|
|
||||||
|
|
||||||
initializeSocket() {
|
|
||||||
if (this.socket) {
|
|
||||||
console.log('Socket already initialized');
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(`Initializing socket connection to Evie`);
|
|
||||||
|
|
||||||
// Ensure apiKey is passed in the query parameters
|
|
||||||
this.socket = io('https://evie.askeveai.com', {
|
|
||||||
path: '/chat/socket.io/',
|
|
||||||
transports: ['websocket', 'polling'],
|
|
||||||
query: {
|
|
||||||
tenantId: this.tenantId,
|
|
||||||
apiKey: this.apiKey // Ensure apiKey is included here
|
|
||||||
},
|
|
||||||
auth: {
|
|
||||||
token: 'Bearer ' + this.apiKey // Ensure token is included here
|
|
||||||
},
|
|
||||||
reconnectionAttempts: Infinity, // Infinite reconnection attempts
|
|
||||||
reconnectionDelay: 5000, // Delay between reconnections
|
|
||||||
timeout: 20000 // Connection timeout
|
|
||||||
});
|
|
||||||
|
|
||||||
console.log(`Finished initializing socket connection to Evie`);
|
|
||||||
|
|
||||||
this.socket.on('connect', (data) => {
|
|
||||||
console.log('Socket connected OK');
|
|
||||||
this.setStatusMessage('Connected to EveAI.');
|
|
||||||
this.updateConnectionStatus(true);
|
|
||||||
this.startHeartbeat();
|
|
||||||
if (data.room) {
|
|
||||||
this.room = data.room;
|
|
||||||
console.log(`Joined room: ${this.room}`);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
this.socket.on('authenticated', (data) => {
|
|
||||||
console.log('Authenticated event received: ', data);
|
|
||||||
this.setStatusMessage('Authenticated.');
|
|
||||||
if (data.token) {
|
|
||||||
this.jwtToken = data.token; // Store the JWT token received from the server
|
|
||||||
}
|
|
||||||
if (data.room) {
|
|
||||||
this.room = data.room;
|
|
||||||
console.log(`Confirmed room: ${this.room}`);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
this.socket.on('connect_error', (err) => {
|
|
||||||
console.error('Socket connection error:', err);
|
|
||||||
this.setStatusMessage('EveAI Chat Widget needs further configuration by site administrator.');
|
|
||||||
this.updateConnectionStatus(false);
|
|
||||||
});
|
|
||||||
|
|
||||||
this.socket.on('connect_timeout', () => {
|
|
||||||
console.error('Socket connection timeout');
|
|
||||||
this.setStatusMessage('EveAI Chat Widget needs further configuration by site administrator.');
|
|
||||||
this.updateConnectionStatus(false);
|
|
||||||
});
|
|
||||||
|
|
||||||
this.socket.on('disconnect', (reason) => {
|
|
||||||
console.log('Socket disconnected: ', reason);
|
|
||||||
if (reason === 'io server disconnect') {
|
|
||||||
// Server disconnected the socket
|
|
||||||
this.socket.connect(); // Attempt to reconnect
|
|
||||||
}
|
|
||||||
this.setStatusMessage('Disconnected from EveAI. Please refresh the page for further interaction.');
|
|
||||||
this.updateConnectionStatus(false);
|
|
||||||
this.stopHeartbeat();
|
|
||||||
});
|
|
||||||
|
|
||||||
this.socket.on('reconnect_attempt', () => {
|
|
||||||
console.log('Attempting to reconnect to the server...');
|
|
||||||
this.setStatusMessage('Attempting to reconnect...');
|
|
||||||
});
|
|
||||||
|
|
||||||
this.socket.on('reconnect', () => {
|
|
||||||
console.log('Successfully reconnected to the server');
|
|
||||||
this.setStatusMessage('Reconnected to EveAI.');
|
|
||||||
this.updateConnectionStatus(true);
|
|
||||||
this.startHeartbeat();
|
|
||||||
});
|
|
||||||
|
|
||||||
this.socket.on('bot_response', (data) => {
|
|
||||||
if (data.tenantId === this.tenantId) {
|
|
||||||
console.log('Initial response received:', data);
|
|
||||||
console.log('Task ID received:', data.taskId);
|
|
||||||
this.checkTaskStatus(data.taskId);
|
|
||||||
this.setStatusMessage('Processing...');
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
this.socket.on('task_status', (data) => {
|
|
||||||
console.log('Task status received:', data.status);
|
|
||||||
console.log('Task ID received:', data.taskId);
|
|
||||||
console.log('Citations type:', typeof data.citations, 'Citations:', data.citations);
|
|
||||||
|
|
||||||
if (data.status === 'pending') {
|
|
||||||
this.updateProgress();
|
|
||||||
setTimeout(() => this.checkTaskStatus(data.taskId), 1000); // Poll every second
|
|
||||||
} else if (data.status === 'success') {
|
|
||||||
this.addBotMessage(data.answer, data.interaction_id, data.algorithm, data.citations);
|
|
||||||
this.clearProgress(); // Clear progress indicator when done
|
|
||||||
} else {
|
|
||||||
this.setStatusMessage('Failed to process message.');
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
setStatusMessage(message) {
|
|
||||||
this.statusMessage.textContent = message;
|
|
||||||
}
|
|
||||||
|
|
||||||
updateConnectionStatus(isConnected) {
|
|
||||||
if (isConnected) {
|
|
||||||
this.connectionStatusIcon.textContent = 'link';
|
|
||||||
this.connectionStatusIcon.classList.remove('status-disconnected');
|
|
||||||
this.connectionStatusIcon.classList.add('status-connected');
|
|
||||||
} else {
|
|
||||||
this.connectionStatusIcon.textContent = 'link_off';
|
|
||||||
this.connectionStatusIcon.classList.remove('status-connected');
|
|
||||||
this.connectionStatusIcon.classList.add('status-disconnected');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
startHeartbeat() {
|
|
||||||
this.stopHeartbeat(); // Clear any existing interval
|
|
||||||
this.heartbeatInterval = setInterval(() => {
|
|
||||||
if (this.socket && this.socket.connected) {
|
|
||||||
this.socket.emit('heartbeat');
|
|
||||||
this.idleTime += 30000;
|
|
||||||
if (this.idleTime >= this.maxConnectionIdleTime) {
|
|
||||||
this.socket.disconnect();
|
|
||||||
this.setStatusMessage('Disconnected due to inactivity.');
|
|
||||||
this.updateConnectionStatus(false);
|
|
||||||
this.stopHeartbeat();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}, 30000); // Send a heartbeat every 30 seconds
|
|
||||||
}
|
|
||||||
|
|
||||||
stopHeartbeat() {
|
|
||||||
if (this.heartbeatInterval) {
|
|
||||||
clearInterval(this.heartbeatInterval);
|
|
||||||
this.heartbeatInterval = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
updateProgress() {
|
|
||||||
if (!this.statusMessage.textContent) {
|
|
||||||
this.statusMessage.textContent = 'Processing...';
|
|
||||||
} else {
|
|
||||||
this.statusMessage.textContent += '.'; // Append a dot
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
clearProgress() {
|
|
||||||
this.statusMessage.textContent = '';
|
|
||||||
this.toggleSendButton(false); // Re-enable and revert send button to outlined version
|
|
||||||
}
|
|
||||||
|
|
||||||
checkTaskStatus(taskId) {
|
|
||||||
this.updateProgress();
|
|
||||||
this.socket.emit('check_task_status', { task_id: taskId });
|
|
||||||
}
|
|
||||||
|
|
||||||
getTemplate() {
|
|
||||||
return `
|
|
||||||
<div class="chat-container">
|
|
||||||
<div class="messages-area"></div>
|
|
||||||
<div class="disclaimer">Evie can make mistakes. Please double-check responses.</div>
|
|
||||||
<div class="question-area">
|
|
||||||
<textarea placeholder="Type your message here..." rows="3"></textarea>
|
|
||||||
<div class="right-side">
|
|
||||||
<select class="language-select"></select>
|
|
||||||
<i class="material-icons send-icon outlined">send</i>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<div class="status-line">
|
|
||||||
<i class="material-icons connection-status-icon">link_off</i>
|
|
||||||
<span class="status-message"></span>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
`;
|
|
||||||
}
|
|
||||||
|
|
||||||
addUserMessage(text) {
|
|
||||||
const message = document.createElement('div');
|
|
||||||
message.classList.add('message', 'user');
|
|
||||||
message.innerHTML = `<p>${text}</p>`;
|
|
||||||
this.messagesArea.appendChild(message);
|
|
||||||
this.messagesArea.scrollTop = this.messagesArea.scrollHeight;
|
|
||||||
}
|
|
||||||
|
|
||||||
handleFeedback(feedback, interactionId) {
|
|
||||||
// Send feedback to the backend
|
|
||||||
console.log('handleFeedback called');
|
|
||||||
if (!this.socket) {
|
|
||||||
console.error('Socket is not initialized');
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (!this.jwtToken) {
|
|
||||||
console.error('JWT token is not available');
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
console.log('Sending message to backend');
|
|
||||||
console.log(`Feedback for ${interactionId}: ${feedback}`);
|
|
||||||
this.socket.emit('feedback', { tenantId: this.tenantId, token: this.jwtToken, feedback, interactionId });
|
|
||||||
this.setStatusMessage('Feedback sent.');
|
|
||||||
}
|
|
||||||
|
|
||||||
addBotMessage(text, interactionId, algorithm = 'default', citations = []) {
|
|
||||||
const message = document.createElement('div');
|
|
||||||
message.classList.add('message', 'bot');
|
|
||||||
|
|
||||||
let content = marked.parse(text);
|
|
||||||
let citationsHtml = citations.map(url => `<a href="${url}" target="_blank">${url}</a>`).join('<br>');
|
|
||||||
|
|
||||||
let algorithmClass;
|
|
||||||
switch (algorithm) {
|
|
||||||
case 'RAG_TENANT':
|
|
||||||
algorithmClass = 'fingerprint-rag-tenant';
|
|
||||||
break;
|
|
||||||
case 'RAG_WIKIPEDIA':
|
|
||||||
algorithmClass = 'fingerprint-rag-wikipedia';
|
|
||||||
break;
|
|
||||||
case 'RAG_GOOGLE':
|
|
||||||
algorithmClass = 'fingerprint-rag-google';
|
|
||||||
break;
|
|
||||||
case 'LLM':
|
|
||||||
algorithmClass = 'fingerprint-llm';
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
algorithmClass = '';
|
|
||||||
}
|
|
||||||
|
|
||||||
message.innerHTML = `
|
|
||||||
<p>${content}</p>
|
|
||||||
${citationsHtml ? `<p class="citations">${citationsHtml}</p>` : ''}
|
|
||||||
<div class="message-icons">
|
|
||||||
<i class="material-icons ${algorithmClass}">fingerprint</i>
|
|
||||||
<i class="material-icons thumb-icon outlined" data-feedback="up" data-interaction-id="${interactionId}">thumb_up_off_alt</i>
|
|
||||||
<i class="material-icons thumb-icon outlined" data-feedback="down" data-interaction-id="${interactionId}">thumb_down_off_alt</i>
|
|
||||||
</div>
|
|
||||||
`;
|
|
||||||
this.messagesArea.appendChild(message);
|
|
||||||
|
|
||||||
// Add event listeners for feedback buttons
|
|
||||||
const thumbsUp = message.querySelector('i[data-feedback="up"]');
|
|
||||||
const thumbsDown = message.querySelector('i[data-feedback="down"]');
|
|
||||||
thumbsUp.addEventListener('click', () => this.toggleFeedback(thumbsUp, thumbsDown, 'up', interactionId));
|
|
||||||
thumbsDown.addEventListener('click', () => this.toggleFeedback(thumbsUp, thumbsDown, 'down', interactionId));
|
|
||||||
|
|
||||||
this.messagesArea.scrollTop = this.messagesArea.scrollHeight;
|
|
||||||
}
|
|
||||||
|
|
||||||
toggleFeedback(thumbsUp, thumbsDown, feedback, interactionId) {
|
|
||||||
console.log('feedback called');
|
|
||||||
this.idleTime = 0; // Reset idle time
|
|
||||||
if (feedback === 'up') {
|
|
||||||
thumbsUp.textContent = 'thumb_up'; // Change to filled icon
|
|
||||||
thumbsUp.classList.remove('outlined');
|
|
||||||
thumbsUp.classList.add('filled');
|
|
||||||
thumbsDown.textContent = 'thumb_down_off_alt'; // Keep the other icon outlined
|
|
||||||
thumbsDown.classList.add('outlined');
|
|
||||||
thumbsDown.classList.remove('filled');
|
|
||||||
} else {
|
|
||||||
thumbsDown.textContent = 'thumb_down'; // Change to filled icon
|
|
||||||
thumbsDown.classList.remove('outlined');
|
|
||||||
thumbsDown.classList.add('filled');
|
|
||||||
thumbsUp.textContent = 'thumb_up_off_alt'; // Keep the other icon outlined
|
|
||||||
thumbsUp.classList.add('outlined');
|
|
||||||
thumbsUp.classList.remove('filled');
|
|
||||||
}
|
|
||||||
|
|
||||||
// Send feedback to the backend
|
|
||||||
this.handleFeedback(feedback, interactionId);
|
|
||||||
}
|
|
||||||
|
|
||||||
handleSendMessage() {
|
|
||||||
console.log('handleSendMessage called');
|
|
||||||
this.idleTime = 0; // Reset idle time
|
|
||||||
const message = this.questionInput.value.trim();
|
|
||||||
if (message) {
|
|
||||||
this.addUserMessage(message);
|
|
||||||
this.questionInput.value = '';
|
|
||||||
this.sendMessageToBackend(message);
|
|
||||||
this.toggleSendButton(true); // Disable and change send button to filled version
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
sendMessageToBackend(message) {
|
|
||||||
console.log('sendMessageToBackend called');
|
|
||||||
if (!this.socket) {
|
|
||||||
console.error('Socket is not initialized');
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (!this.jwtToken) {
|
|
||||||
console.error('JWT token is not available');
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const selectedLanguage = this.languageSelect.value;
|
|
||||||
|
|
||||||
console.log('Sending message to backend');
|
|
||||||
this.socket.emit('user_message', {
|
|
||||||
tenantId: this.tenantId,
|
|
||||||
token: this.jwtToken,
|
|
||||||
message,
|
|
||||||
language: selectedLanguage,
|
|
||||||
timezone: this.userTimezone
|
|
||||||
});
|
|
||||||
this.setStatusMessage('Processing started ...')
|
|
||||||
}
|
|
||||||
|
|
||||||
toggleSendButton(isProcessing) {
|
|
||||||
if (isProcessing) {
|
|
||||||
this.sendButton.textContent = 'send'; // Filled send icon
|
|
||||||
this.sendButton.classList.remove('outlined');
|
|
||||||
this.sendButton.classList.add('filled');
|
|
||||||
this.sendButton.classList.add('disabled'); // Add disabled class for styling
|
|
||||||
this.sendButton.style.pointerEvents = 'none'; // Disable click events
|
|
||||||
} else {
|
|
||||||
this.sendButton.textContent = 'send'; // Outlined send icon
|
|
||||||
this.sendButton.classList.add('outlined');
|
|
||||||
this.sendButton.classList.remove('filled');
|
|
||||||
this.sendButton.classList.remove('disabled'); // Remove disabled class
|
|
||||||
this.sendButton.style.pointerEvents = 'auto'; // Re-enable click events
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
customElements.define('eveai-chat-widget', EveAIChatWidget);
|
|
||||||
|
|
||||||
@@ -1,29 +0,0 @@
|
|||||||
// static/js/eveai-sdk.js
|
|
||||||
class EveAI {
|
|
||||||
constructor(tenantId, apiKey, domain, language, languages) {
|
|
||||||
this.tenantId = tenantId;
|
|
||||||
this.apiKey = apiKey;
|
|
||||||
this.domain = domain;
|
|
||||||
this.language = language;
|
|
||||||
this.languages = languages;
|
|
||||||
|
|
||||||
console.log('EveAI constructor:', { tenantId, apiKey, domain, language, languages });
|
|
||||||
}
|
|
||||||
|
|
||||||
initializeChat(containerId) {
|
|
||||||
const container = document.getElementById(containerId);
|
|
||||||
if (container) {
|
|
||||||
container.innerHTML = '<eveai-chat-widget></eveai-chat-widget>';
|
|
||||||
customElements.whenDefined('eveai-chat-widget').then(() => {
|
|
||||||
const chatWidget = container.querySelector('eveai-chat-widget');
|
|
||||||
chatWidget.setAttribute('tenant-id', this.tenantId);
|
|
||||||
chatWidget.setAttribute('api-key', this.apiKey);
|
|
||||||
chatWidget.setAttribute('domain', this.domain);
|
|
||||||
chatWidget.setAttribute('language', this.language);
|
|
||||||
chatWidget.setAttribute('languages', this.languages);
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
console.error('Container not found');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -43,7 +43,6 @@ pgvector~=0.2.5
|
|||||||
pycryptodome~=3.20.0
|
pycryptodome~=3.20.0
|
||||||
pydantic~=2.7.4
|
pydantic~=2.7.4
|
||||||
PyJWT~=2.8.0
|
PyJWT~=2.8.0
|
||||||
pypdf~=4.2.0
|
|
||||||
PySocks~=1.7.1
|
PySocks~=1.7.1
|
||||||
python-dateutil~=2.9.0.post0
|
python-dateutil~=2.9.0.post0
|
||||||
python-engineio~=4.9.1
|
python-engineio~=4.9.1
|
||||||
@@ -61,16 +60,17 @@ urllib3~=2.2.2
|
|||||||
WTForms~=3.1.2
|
WTForms~=3.1.2
|
||||||
wtforms-html5~=0.6.1
|
wtforms-html5~=0.6.1
|
||||||
zxcvbn~=4.4.28
|
zxcvbn~=4.4.28
|
||||||
|
|
||||||
pytube~=15.0.0
|
|
||||||
PyPDF2~=3.0.1
|
|
||||||
groq~=0.9.0
|
groq~=0.9.0
|
||||||
pydub~=0.25.1
|
pydub~=0.25.1
|
||||||
argparse~=1.4.0
|
argparse~=1.4.0
|
||||||
portkey_ai~=1.7.0
|
portkey_ai~=1.8.2
|
||||||
|
|
||||||
minio~=7.2.7
|
minio~=7.2.7
|
||||||
Werkzeug~=3.0.3
|
Werkzeug~=3.0.3
|
||||||
itsdangerous~=2.2.0
|
itsdangerous~=2.2.0
|
||||||
cryptography~=43.0.0
|
cryptography~=43.0.0
|
||||||
graypy~=2.1.0
|
graypy~=2.1.0
|
||||||
|
lxml~=5.3.0
|
||||||
|
pillow~=10.4.0
|
||||||
|
pdfplumber~=0.11.4
|
||||||
|
PyPDF2~=3.0.1
|
||||||
|
Flask-RESTful~=0.3.10
|
||||||
Reference in New Issue
Block a user