Compare commits
7 Commits
v1.0.3-alf
...
v1.0.6-alf
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1fa33c029b | ||
|
|
bcf7d439f3 | ||
|
|
b9acf4d2ae | ||
|
|
ae7bf3dbae | ||
|
|
914c265afe | ||
|
|
a158655247 | ||
|
|
bc350af247 |
80
CHANGELOG.md
Normal file
80
CHANGELOG.md
Normal file
@@ -0,0 +1,80 @@
|
||||
# Changelog
|
||||
|
||||
All notable changes to EveAI will be documented in this file.
|
||||
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
### Added
|
||||
- For new features.
|
||||
|
||||
### Changed
|
||||
- For changes in existing functionality.
|
||||
|
||||
### Deprecated
|
||||
- For soon-to-be removed features.
|
||||
|
||||
### Removed
|
||||
- For now removed features.
|
||||
|
||||
### Fixed
|
||||
- For any bug fixes.
|
||||
|
||||
### Security
|
||||
- In case of vulnerabilities.
|
||||
|
||||
## [1.0.5-alfa] - 2024-09-02
|
||||
|
||||
### Added
|
||||
- Allow chatwidget to connect to multiple servers (e.g. development and production)
|
||||
- Start implementation of API
|
||||
- Add API-key functionality to tenants
|
||||
- Deduplication of API and Document view code
|
||||
- Allow URL addition to accept all types of files, not just HTML
|
||||
- Allow new file types upload: srt, mp3, ogg, mp4
|
||||
- Improve processing of different file types using Processor classes
|
||||
|
||||
### Removed
|
||||
- Removed direct upload of Youtube URLs, due to continuous changes in Youtube website
|
||||
|
||||
## [1.0.4-alfa] - 2024-08-27
|
||||
Skipped
|
||||
|
||||
## [1.0.3-alfa] - 2024-08-27
|
||||
|
||||
### Added
|
||||
- Refinement of HTML processing - allow for excluded classes and elements.
|
||||
- Allow for multiple instances of Evie on 1 website (pure + Wordpress plugin)
|
||||
|
||||
### Changed
|
||||
- PDF Processing extracted in new PDF Processor class.
|
||||
- Allow for longer and more complex PDFs to be uploaded.
|
||||
|
||||
## [1.0.2-alfa] - 2024-08-22
|
||||
|
||||
### Fixed
|
||||
- Bugfix for ResetPasswordForm in config.py
|
||||
|
||||
## [1.0.1-alfa] - 2024-08-21
|
||||
|
||||
### Added
|
||||
- Full Document Version Overview
|
||||
|
||||
### Changed
|
||||
- Improvements to user creation and registration, renewal of passwords, ...
|
||||
|
||||
## [1.0.0-alfa] - 2024-08-16
|
||||
|
||||
### Added
|
||||
- Initial release of the project.
|
||||
|
||||
### Changed
|
||||
- None
|
||||
|
||||
### Fixed
|
||||
- None
|
||||
|
||||
[Unreleased]: https://github.com/username/repo/compare/v1.0.0...HEAD
|
||||
[1.0.0]: https://github.com/username/repo/releases/tag/v1.0.0
|
||||
@@ -9,6 +9,7 @@ from flask_socketio import SocketIO
|
||||
from flask_jwt_extended import JWTManager
|
||||
from flask_session import Session
|
||||
from flask_wtf import CSRFProtect
|
||||
from flask_restful import Api
|
||||
|
||||
from .utils.nginx_utils import prefixed_url_for
|
||||
from .utils.simple_encryption import SimpleEncryption
|
||||
@@ -27,6 +28,7 @@ cors = CORS()
|
||||
socketio = SocketIO()
|
||||
jwt = JWTManager()
|
||||
session = Session()
|
||||
api = Api()
|
||||
|
||||
# kms_client = JosKMSClient.from_service_account_json('config/gc_sa_eveai.json')
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ class Document(db.Model):
|
||||
|
||||
# Versioning Information
|
||||
created_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now())
|
||||
created_by = db.Column(db.Integer, db.ForeignKey(User.id), nullable=False)
|
||||
created_by = db.Column(db.Integer, db.ForeignKey(User.id), nullable=True)
|
||||
updated_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now(), onupdate=db.func.now())
|
||||
updated_by = db.Column(db.Integer, db.ForeignKey(User.id))
|
||||
|
||||
|
||||
@@ -54,6 +54,8 @@ class Tenant(db.Model):
|
||||
license_end_date = db.Column(db.Date, nullable=True)
|
||||
allowed_monthly_interactions = db.Column(db.Integer, nullable=True)
|
||||
encrypted_chat_api_key = db.Column(db.String(500), nullable=True)
|
||||
encrypted_api_key = db.Column(db.String(500), nullable=True)
|
||||
|
||||
|
||||
# Tuning enablers
|
||||
embed_tuning = db.Column(db.Boolean, nullable=True, default=False)
|
||||
|
||||
230
common/utils/document_utils.py
Normal file
230
common/utils/document_utils.py
Normal file
@@ -0,0 +1,230 @@
|
||||
from datetime import datetime as dt, timezone as tz
|
||||
from sqlalchemy.exc import SQLAlchemyError
|
||||
from werkzeug.utils import secure_filename
|
||||
from common.models.document import Document, DocumentVersion
|
||||
from common.extensions import db, minio_client
|
||||
from common.utils.celery_utils import current_celery
|
||||
from flask import current_app
|
||||
import requests
|
||||
from urllib.parse import urlparse, unquote
|
||||
import os
|
||||
from .eveai_exceptions import EveAIInvalidLanguageException, EveAIDoubleURLException, EveAIUnsupportedFileType, \
|
||||
EveAIYoutubeError
|
||||
|
||||
|
||||
def create_document_stack(api_input, file, filename, extension, tenant_id):
|
||||
# Create the Document
|
||||
new_doc = create_document(api_input, filename, tenant_id)
|
||||
db.session.add(new_doc)
|
||||
|
||||
# Create the DocumentVersion
|
||||
new_doc_vers = create_version_for_document(new_doc,
|
||||
api_input.get('url', ''),
|
||||
api_input.get('language', 'en'),
|
||||
api_input.get('user_context', '')
|
||||
)
|
||||
db.session.add(new_doc_vers)
|
||||
|
||||
try:
|
||||
db.session.commit()
|
||||
except SQLAlchemyError as e:
|
||||
current_app.logger.error(f'Error adding document for tenant {tenant_id}: {e}')
|
||||
db.session.rollback()
|
||||
raise
|
||||
|
||||
current_app.logger.info(f'Document added successfully for tenant {tenant_id}, '
|
||||
f'Document Version {new_doc.id}')
|
||||
|
||||
# Upload file to storage
|
||||
upload_file_for_version(new_doc_vers, file, extension, tenant_id)
|
||||
|
||||
return new_doc, new_doc_vers
|
||||
|
||||
|
||||
def create_document(form, filename, tenant_id):
|
||||
new_doc = Document()
|
||||
if form['name'] == '':
|
||||
new_doc.name = filename.rsplit('.', 1)[0]
|
||||
else:
|
||||
new_doc.name = form['name']
|
||||
|
||||
if form['valid_from'] and form['valid_from'] != '':
|
||||
new_doc.valid_from = form['valid_from']
|
||||
else:
|
||||
new_doc.valid_from = dt.now(tz.utc)
|
||||
new_doc.tenant_id = tenant_id
|
||||
set_logging_information(new_doc, dt.now(tz.utc))
|
||||
|
||||
return new_doc
|
||||
|
||||
|
||||
def create_version_for_document(document, url, language, user_context):
|
||||
new_doc_vers = DocumentVersion()
|
||||
if url != '':
|
||||
new_doc_vers.url = url
|
||||
|
||||
if language == '':
|
||||
raise EveAIInvalidLanguageException('Language is required for document creation!')
|
||||
else:
|
||||
new_doc_vers.language = language
|
||||
|
||||
if user_context != '':
|
||||
new_doc_vers.user_context = user_context
|
||||
|
||||
new_doc_vers.document = document
|
||||
|
||||
set_logging_information(new_doc_vers, dt.now(tz.utc))
|
||||
|
||||
return new_doc_vers
|
||||
|
||||
|
||||
def upload_file_for_version(doc_vers, file, extension, tenant_id):
|
||||
doc_vers.file_type = extension
|
||||
doc_vers.file_name = doc_vers.calc_file_name()
|
||||
doc_vers.file_location = doc_vers.calc_file_location()
|
||||
|
||||
# Normally, the tenant bucket should exist. But let's be on the safe side if a migration took place.
|
||||
minio_client.create_tenant_bucket(tenant_id)
|
||||
|
||||
try:
|
||||
minio_client.upload_document_file(
|
||||
tenant_id,
|
||||
doc_vers.doc_id,
|
||||
doc_vers.language,
|
||||
doc_vers.id,
|
||||
doc_vers.file_name,
|
||||
file
|
||||
)
|
||||
db.session.commit()
|
||||
current_app.logger.info(f'Successfully saved document to MinIO for tenant {tenant_id} for '
|
||||
f'document version {doc_vers.id} while uploading file.')
|
||||
except Exception as e:
|
||||
db.session.rollback()
|
||||
current_app.logger.error(
|
||||
f'Error saving document to MinIO for tenant {tenant_id}: {e}')
|
||||
raise
|
||||
|
||||
|
||||
def set_logging_information(obj, timestamp):
|
||||
obj.created_at = timestamp
|
||||
obj.updated_at = timestamp
|
||||
|
||||
|
||||
def update_logging_information(obj, timestamp):
|
||||
obj.updated_at = timestamp
|
||||
|
||||
|
||||
def get_extension_from_content_type(content_type):
|
||||
content_type_map = {
|
||||
'text/html': 'html',
|
||||
'application/pdf': 'pdf',
|
||||
'text/plain': 'txt',
|
||||
'application/msword': 'doc',
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'docx',
|
||||
# Add more mappings as needed
|
||||
}
|
||||
return content_type_map.get(content_type, 'html') # Default to 'html' if unknown
|
||||
|
||||
|
||||
def process_url(url, tenant_id):
|
||||
response = requests.head(url, allow_redirects=True)
|
||||
content_type = response.headers.get('Content-Type', '').split(';')[0]
|
||||
|
||||
# Determine file extension based on Content-Type
|
||||
extension = get_extension_from_content_type(content_type)
|
||||
|
||||
# Generate filename
|
||||
parsed_url = urlparse(url)
|
||||
path = unquote(parsed_url.path)
|
||||
filename = os.path.basename(path)
|
||||
|
||||
if not filename or '.' not in filename:
|
||||
# Use the last part of the path or a default name
|
||||
filename = path.strip('/').split('/')[-1] or 'document'
|
||||
filename = secure_filename(f"{filename}.{extension}")
|
||||
else:
|
||||
filename = secure_filename(filename)
|
||||
|
||||
# Check if a document with this URL already exists
|
||||
existing_doc = DocumentVersion.query.filter_by(url=url).first()
|
||||
if existing_doc:
|
||||
raise EveAIDoubleURLException
|
||||
|
||||
# Download the content
|
||||
response = requests.get(url)
|
||||
response.raise_for_status()
|
||||
file_content = response.content
|
||||
|
||||
return file_content, filename, extension
|
||||
|
||||
|
||||
def process_multiple_urls(urls, tenant_id, api_input):
|
||||
results = []
|
||||
for url in urls:
|
||||
try:
|
||||
file_content, filename, extension = process_url(url, tenant_id)
|
||||
|
||||
url_input = api_input.copy()
|
||||
url_input.update({
|
||||
'url': url,
|
||||
'name': f"{api_input['name']}-{filename}" if api_input['name'] else filename
|
||||
})
|
||||
|
||||
new_doc, new_doc_vers = create_document_stack(url_input, file_content, filename, extension, tenant_id)
|
||||
task_id = start_embedding_task(tenant_id, new_doc_vers.id)
|
||||
|
||||
results.append({
|
||||
'url': url,
|
||||
'document_id': new_doc.id,
|
||||
'document_version_id': new_doc_vers.id,
|
||||
'task_id': task_id,
|
||||
'status': 'success'
|
||||
})
|
||||
except Exception as e:
|
||||
current_app.logger.error(f"Error processing URL {url}: {str(e)}")
|
||||
results.append({
|
||||
'url': url,
|
||||
'status': 'error',
|
||||
'message': str(e)
|
||||
})
|
||||
return results
|
||||
|
||||
|
||||
def prepare_youtube_document(url, tenant_id, api_input):
|
||||
try:
|
||||
filename = f"placeholder.youtube"
|
||||
extension = 'youtube'
|
||||
|
||||
new_doc = create_document(api_input, filename, tenant_id)
|
||||
new_doc_vers = create_version_for_document(new_doc, url, api_input['language'], api_input['user_context'])
|
||||
|
||||
new_doc_vers.file_type = extension
|
||||
new_doc_vers.file_name = new_doc_vers.calc_file_name()
|
||||
new_doc_vers.file_location = new_doc_vers.calc_file_location()
|
||||
|
||||
db.session.add(new_doc)
|
||||
db.session.add(new_doc_vers)
|
||||
db.session.commit()
|
||||
|
||||
return new_doc, new_doc_vers
|
||||
except Exception as e:
|
||||
raise EveAIYoutubeError(f"Error preparing YouTube document: {str(e)}")
|
||||
|
||||
|
||||
def start_embedding_task(tenant_id, doc_vers_id):
|
||||
task = current_celery.send_task('create_embeddings', queue='embeddings', args=[
|
||||
tenant_id,
|
||||
doc_vers_id,
|
||||
])
|
||||
current_app.logger.info(f'Embedding creation started for tenant {tenant_id}, '
|
||||
f'Document Version {doc_vers_id}. '
|
||||
f'Embedding creation task: {task.id}')
|
||||
return task.id
|
||||
|
||||
|
||||
def validate_file_type(extension):
|
||||
current_app.logger.debug(f'Validating file type {extension}')
|
||||
current_app.logger.debug(f'Supported file types: {current_app.config["SUPPORTED_FILE_TYPES"]}')
|
||||
if extension not in current_app.config['SUPPORTED_FILE_TYPES']:
|
||||
raise EveAIUnsupportedFileType(f"Filetype {extension} is currently not supported. "
|
||||
f"Supported filetypes: {', '.join(current_app.config['SUPPORTED_FILE_TYPES'])}")
|
||||
43
common/utils/eveai_exceptions.py
Normal file
43
common/utils/eveai_exceptions.py
Normal file
@@ -0,0 +1,43 @@
|
||||
class EveAIException(Exception):
|
||||
"""Base exception class for EveAI API"""
|
||||
|
||||
def __init__(self, message, status_code=400, payload=None):
|
||||
super().__init__()
|
||||
self.message = message
|
||||
self.status_code = status_code
|
||||
self.payload = payload
|
||||
|
||||
def to_dict(self):
|
||||
rv = dict(self.payload or ())
|
||||
rv['message'] = self.message
|
||||
return rv
|
||||
|
||||
|
||||
class EveAIInvalidLanguageException(EveAIException):
|
||||
"""Raised when an invalid language is provided"""
|
||||
|
||||
def __init__(self, message="Langage is required", status_code=400, payload=None):
|
||||
super().__init__(message, status_code, payload)
|
||||
|
||||
|
||||
class EveAIDoubleURLException(EveAIException):
|
||||
"""Raised when an existing url is provided"""
|
||||
|
||||
def __init__(self, message="URL already exists", status_code=400, payload=None):
|
||||
super().__init__(message, status_code, payload)
|
||||
|
||||
|
||||
class EveAIUnsupportedFileType(EveAIException):
|
||||
"""Raised when an invalid file type is provided"""
|
||||
|
||||
def __init__(self, message="Filetype is not supported", status_code=400, payload=None):
|
||||
super().__init__(message, status_code, payload)
|
||||
|
||||
|
||||
class EveAIYoutubeError(EveAIException):
|
||||
"""Raised when adding a Youtube document fails"""
|
||||
|
||||
def __init__(self, message="Youtube document creation failed", status_code=400, payload=None):
|
||||
super().__init__(message, status_code, payload)
|
||||
|
||||
# Add more custom exceptions as needed
|
||||
@@ -136,6 +136,10 @@ class Config(object):
|
||||
MAIL_PASSWORD = environ.get('MAIL_PASSWORD')
|
||||
MAIL_DEFAULT_SENDER = ('eveAI Admin', MAIL_USERNAME)
|
||||
|
||||
SUPPORTED_FILE_TYPES = ['pdf', 'html', 'md', 'txt', 'mp3', 'mp4', 'ogg', 'srt']
|
||||
|
||||
|
||||
|
||||
|
||||
class DevConfig(Config):
|
||||
DEVELOPMENT = True
|
||||
|
||||
@@ -60,6 +60,14 @@ LOGGING = {
|
||||
'backupCount': 10,
|
||||
'formatter': 'standard',
|
||||
},
|
||||
'file_api': {
|
||||
'level': 'DEBUG',
|
||||
'class': 'logging.handlers.RotatingFileHandler',
|
||||
'filename': 'logs/eveai_api.log',
|
||||
'maxBytes': 1024 * 1024 * 5, # 5MB
|
||||
'backupCount': 10,
|
||||
'formatter': 'standard',
|
||||
},
|
||||
'file_sqlalchemy': {
|
||||
'level': 'DEBUG',
|
||||
'class': 'logging.handlers.RotatingFileHandler',
|
||||
@@ -146,6 +154,11 @@ LOGGING = {
|
||||
'level': 'DEBUG',
|
||||
'propagate': False
|
||||
},
|
||||
'eveai_api': { # logger for the eveai_chat_workers
|
||||
'handlers': ['file_api', 'graylog', ] if env == 'production' else ['file_api', ],
|
||||
'level': 'DEBUG',
|
||||
'propagate': False
|
||||
},
|
||||
'sqlalchemy.engine': { # logger for the sqlalchemy
|
||||
'handlers': ['file_sqlalchemy', 'graylog', ] if env == 'production' else ['file_sqlalchemy', ],
|
||||
'level': 'DEBUG',
|
||||
|
||||
@@ -57,6 +57,9 @@ services:
|
||||
- ../nginx/sites-enabled:/etc/nginx/sites-enabled
|
||||
- ../nginx/static:/etc/nginx/static
|
||||
- ../nginx/public:/etc/nginx/public
|
||||
- ../integrations/Wordpress/eveai-chat-widget/css/eveai-chat-style.css:/etc/nginx/static/css/eveai-chat-style.css
|
||||
- ../integrations/Wordpress/eveai-chat-widget/js/eveai-chat-widget.js:/etc/nginx/static/js/eveai-chat-widget.js
|
||||
- ../integrations/Wordpress/eveai-chat-widget/js/eveai-sdk.js:/etc/nginx/static/js/eveai-sdk.js
|
||||
- ./logs/nginx:/var/log/nginx
|
||||
depends_on:
|
||||
- eveai_app
|
||||
|
||||
@@ -10,6 +10,9 @@ COPY ../../nginx/mime.types /etc/nginx/mime.types
|
||||
# Copy static & public files
|
||||
RUN mkdir -p /etc/nginx/static /etc/nginx/public
|
||||
COPY ../../nginx/static /etc/nginx/static
|
||||
COPY ../../integrations/Wordpress/eveai-chat-widget/css/eveai-chat-style.css /etc/nginx/static/css/
|
||||
COPY ../../integrations/Wordpress/eveai-chat-widget/js/eveai-chat-widget.js /etc/nginx/static/js/
|
||||
COPY ../../integrations/Wordpress/eveai-chat-widget/js/eveai-sdk.js /etc/nginx/static/js
|
||||
COPY ../../nginx/public /etc/nginx/public
|
||||
|
||||
# Copy site-specific configurations
|
||||
|
||||
@@ -1,4 +1,72 @@
|
||||
# from flask import Blueprint, request
|
||||
#
|
||||
# public_api_bp = Blueprint("public", __name__, url_prefix="/api/v1")
|
||||
# tenant_api_bp = Blueprint("tenant", __name__, url_prefix="/api/v1/tenant")
|
||||
from flask import Flask, jsonify
|
||||
from flask_jwt_extended import get_jwt_identity
|
||||
from common.extensions import db, api, jwt, minio_client
|
||||
import os
|
||||
import logging.config
|
||||
|
||||
from common.utils.database import Database
|
||||
from config.logging_config import LOGGING
|
||||
from .api.document_api import AddDocumentResource
|
||||
from .api.auth import TokenResource
|
||||
from config.config import get_config
|
||||
from common.utils.celery_utils import make_celery, init_celery
|
||||
from common.utils.eveai_exceptions import EveAIException
|
||||
|
||||
|
||||
def create_app(config_file=None):
|
||||
app = Flask(__name__)
|
||||
|
||||
environment = os.getenv('FLASK_ENV', 'development')
|
||||
|
||||
match environment:
|
||||
case 'development':
|
||||
app.config.from_object(get_config('dev'))
|
||||
case 'production':
|
||||
app.config.from_object(get_config('prod'))
|
||||
case _:
|
||||
app.config.from_object(get_config('dev'))
|
||||
|
||||
app.config['SESSION_KEY_PREFIX'] = 'eveai_api_'
|
||||
|
||||
app.celery = make_celery(app.name, app.config)
|
||||
init_celery(app.celery, app)
|
||||
|
||||
logging.config.dictConfig(LOGGING)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
logger.info("eveai_api starting up")
|
||||
|
||||
# Register Necessary Extensions
|
||||
register_extensions(app)
|
||||
|
||||
# Error handler for the API
|
||||
@app.errorhandler(EveAIException)
|
||||
def handle_eveai_exception(error):
|
||||
response = jsonify(error.to_dict())
|
||||
response.status_code = error.status_code
|
||||
return response
|
||||
|
||||
@api.before_request
|
||||
def before_request():
|
||||
# Extract tenant_id from the JWT token
|
||||
tenant_id = get_jwt_identity()
|
||||
|
||||
# Switch to the correct schema
|
||||
Database(tenant_id).switch_schema()
|
||||
|
||||
# Register resources
|
||||
register_api_resources()
|
||||
|
||||
return app
|
||||
|
||||
|
||||
def register_extensions(app):
|
||||
db.init_app(app)
|
||||
api.init_app(app)
|
||||
jwt.init_app(app)
|
||||
minio_client.init_app(app)
|
||||
|
||||
|
||||
def register_api_resources():
|
||||
api.add_resource(AddDocumentResource, '/api/v1/documents/add_document')
|
||||
api.add_resource(TokenResource, '/api/v1/token')
|
||||
|
||||
24
eveai_api/api/auth.py
Normal file
24
eveai_api/api/auth.py
Normal file
@@ -0,0 +1,24 @@
|
||||
from flask_restful import Resource, reqparse
|
||||
from flask_jwt_extended import create_access_token
|
||||
from common.models.user import Tenant
|
||||
from common.extensions import simple_encryption
|
||||
from flask import current_app
|
||||
|
||||
|
||||
class TokenResource(Resource):
|
||||
def post(self):
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('tenant_id', type=int, required=True)
|
||||
parser.add_argument('api_key', type=str, required=True)
|
||||
args = parser.parse_args()
|
||||
|
||||
tenant = Tenant.query.get(args['tenant_id'])
|
||||
if not tenant:
|
||||
return {'message': 'Tenant not found'}, 404
|
||||
|
||||
decrypted_api_key = simple_encryption.decrypt_api_key(tenant.encrypted_api_key)
|
||||
if args['api_key'] != decrypted_api_key:
|
||||
return {'message': 'Invalid API key'}, 401
|
||||
|
||||
access_token = create_access_token(identity={'tenant_id': tenant.id})
|
||||
return {'access_token': access_token}, 200
|
||||
178
eveai_api/api/document_api.py
Normal file
178
eveai_api/api/document_api.py
Normal file
@@ -0,0 +1,178 @@
|
||||
from flask_restful import Resource, reqparse
|
||||
from flask import current_app
|
||||
from flask_jwt_extended import jwt_required, get_jwt_identity
|
||||
from werkzeug.datastructures import FileStorage
|
||||
from werkzeug.utils import secure_filename
|
||||
from common.utils.document_utils import (
|
||||
create_document_stack, process_url, start_embedding_task,
|
||||
validate_file_type, EveAIInvalidLanguageException, EveAIDoubleURLException, EveAIUnsupportedFileType,
|
||||
process_multiple_urls, prepare_youtube_document
|
||||
)
|
||||
from common.utils.eveai_exceptions import EveAIYoutubeError
|
||||
|
||||
|
||||
class AddDocumentResource(Resource):
|
||||
@jwt_required()
|
||||
def post(self):
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('file', type=FileStorage, location='files', required=True)
|
||||
parser.add_argument('name', type=str, required=False)
|
||||
parser.add_argument('language', type=str, required=True)
|
||||
parser.add_argument('user_context', type=str, required=False)
|
||||
parser.add_argument('valid_from', type=str, required=False)
|
||||
args = parser.parse_args()
|
||||
|
||||
tenant_id = get_jwt_identity()
|
||||
current_app.logger.info(f'Adding document for tenant {tenant_id}')
|
||||
|
||||
try:
|
||||
file = args['file']
|
||||
filename = secure_filename(file.filename)
|
||||
extension = filename.rsplit('.', 1)[1].lower()
|
||||
|
||||
validate_file_type(extension)
|
||||
|
||||
api_input = {
|
||||
'name': args['name'] or filename,
|
||||
'language': args['language'],
|
||||
'user_context': args['user_context'],
|
||||
'valid_from': args['valid_from']
|
||||
}
|
||||
|
||||
new_doc, new_doc_vers = create_document_stack(api_input, file, filename, extension, tenant_id)
|
||||
task_id = start_embedding_task(tenant_id, new_doc_vers.id)
|
||||
|
||||
return {
|
||||
'message': f'Processing on document {new_doc.name}, version {new_doc_vers.id} started. Task ID: {task_id}.',
|
||||
'document_id': new_doc.id,
|
||||
'document_version_id': new_doc_vers.id,
|
||||
'task_id': task_id
|
||||
}, 201
|
||||
|
||||
except (EveAIInvalidLanguageException, EveAIUnsupportedFileType) as e:
|
||||
return {'message': str(e)}, 400
|
||||
except Exception as e:
|
||||
current_app.logger.error(f'Error adding document: {str(e)}')
|
||||
return {'message': 'Error adding document'}, 500
|
||||
|
||||
|
||||
class AddURLResource(Resource):
|
||||
@jwt_required()
|
||||
def post(self):
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('url', type=str, required=True)
|
||||
parser.add_argument('name', type=str, required=False)
|
||||
parser.add_argument('language', type=str, required=True)
|
||||
parser.add_argument('user_context', type=str, required=False)
|
||||
parser.add_argument('valid_from', type=str, required=False)
|
||||
args = parser.parse_args()
|
||||
|
||||
tenant_id = get_jwt_identity()
|
||||
current_app.logger.info(f'Adding document from URL for tenant {tenant_id}')
|
||||
|
||||
try:
|
||||
file_content, filename, extension = process_url(args['url'], tenant_id)
|
||||
|
||||
api_input = {
|
||||
'url': args['url'],
|
||||
'name': args['name'] or filename,
|
||||
'language': args['language'],
|
||||
'user_context': args['user_context'],
|
||||
'valid_from': args['valid_from']
|
||||
}
|
||||
|
||||
new_doc, new_doc_vers = create_document_stack(api_input, file_content, filename, extension, tenant_id)
|
||||
task_id = start_embedding_task(tenant_id, new_doc_vers.id)
|
||||
|
||||
return {
|
||||
'message': f'Processing on document {new_doc.name}, version {new_doc_vers.id} started. Task ID: {task_id}.',
|
||||
'document_id': new_doc.id,
|
||||
'document_version_id': new_doc_vers.id,
|
||||
'task_id': task_id
|
||||
}, 201
|
||||
|
||||
except EveAIDoubleURLException:
|
||||
return {'message': f'A document with URL {args["url"]} already exists.'}, 400
|
||||
except (EveAIInvalidLanguageException, EveAIUnsupportedFileType) as e:
|
||||
return {'message': str(e)}, 400
|
||||
except Exception as e:
|
||||
current_app.logger.error(f'Error adding document from URL: {str(e)}')
|
||||
return {'message': 'Error adding document from URL'}, 500
|
||||
|
||||
|
||||
class AddMultipleURLsResource(Resource):
|
||||
@jwt_required()
|
||||
def post(self):
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('urls', type=str, action='append', required=True)
|
||||
parser.add_argument('name', type=str, required=False)
|
||||
parser.add_argument('language', type=str, required=True)
|
||||
parser.add_argument('user_context', type=str, required=False)
|
||||
parser.add_argument('valid_from', type=str, required=False)
|
||||
args = parser.parse_args()
|
||||
|
||||
tenant_id = get_jwt_identity()
|
||||
current_app.logger.info(f'Adding multiple documents from URLs for tenant {tenant_id}')
|
||||
|
||||
try:
|
||||
api_input = {
|
||||
'name': args['name'],
|
||||
'language': args['language'],
|
||||
'user_context': args['user_context'],
|
||||
'valid_from': args['valid_from']
|
||||
}
|
||||
|
||||
results = process_multiple_urls(args['urls'], tenant_id, api_input)
|
||||
|
||||
return {
|
||||
'message': 'Processing of multiple URLs completed',
|
||||
'results': results
|
||||
}, 201
|
||||
|
||||
except Exception as e:
|
||||
current_app.logger.error(f'Error adding documents from URLs: {str(e)}')
|
||||
return {'message': 'Error adding documents from URLs'}, 500
|
||||
|
||||
|
||||
class AddYoutubeResource(Resource):
|
||||
@jwt_required()
|
||||
def post(self):
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('url', type=str, required=True)
|
||||
parser.add_argument('name', type=str, required=False)
|
||||
parser.add_argument('language', type=str, required=True)
|
||||
parser.add_argument('user_context', type=str, required=False)
|
||||
parser.add_argument('valid_from', type=str, required=False)
|
||||
args = parser.parse_args()
|
||||
|
||||
tenant_id = get_jwt_identity()
|
||||
current_app.logger.info(f'Adding YouTube document for tenant {tenant_id}')
|
||||
|
||||
try:
|
||||
api_input = {
|
||||
'name': args['name'],
|
||||
'language': args['language'],
|
||||
'user_context': args['user_context'],
|
||||
'valid_from': args['valid_from']
|
||||
}
|
||||
|
||||
new_doc, new_doc_vers = prepare_youtube_document(args['url'], tenant_id, api_input)
|
||||
task_id = start_embedding_task(tenant_id, new_doc_vers.id)
|
||||
|
||||
return {
|
||||
'message': f'Processing on YouTube document {new_doc.name}, version {new_doc_vers.id} started. Task ID: {task_id}.',
|
||||
'document_id': new_doc.id,
|
||||
'document_version_id': new_doc_vers.id,
|
||||
'task_id': task_id
|
||||
}, 201
|
||||
|
||||
except EveAIYoutubeError as e:
|
||||
return {'message': str(e)}, 400
|
||||
except (EveAIInvalidLanguageException, EveAIUnsupportedFileType) as e:
|
||||
return {'message': str(e)}, 400
|
||||
except Exception as e:
|
||||
current_app.logger.error(f'Error adding YouTube document: {str(e)}')
|
||||
return {'message': 'Error adding YouTube document'}, 500
|
||||
|
||||
|
||||
# You can add more API resources here as needed
|
||||
@@ -1,7 +0,0 @@
|
||||
from flask import request
|
||||
from flask.views import MethodView
|
||||
|
||||
class RegisterAPI(MethodView):
|
||||
def post(self):
|
||||
username = request.json['username']
|
||||
|
||||
@@ -27,7 +27,6 @@ def create_app(config_file=None):
|
||||
app.wsgi_app = ProxyFix(app.wsgi_app, x_for=1, x_proto=1, x_host=1, x_port=1)
|
||||
|
||||
environment = os.getenv('FLASK_ENV', 'development')
|
||||
print(environment)
|
||||
|
||||
match environment:
|
||||
case 'development':
|
||||
@@ -49,8 +48,6 @@ def create_app(config_file=None):
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
logger.info("eveai_app starting up")
|
||||
logger.debug("start config")
|
||||
logger.debug(app.config)
|
||||
|
||||
# Register extensions
|
||||
|
||||
@@ -95,14 +92,11 @@ def create_app(config_file=None):
|
||||
}
|
||||
return jsonify(response), 500
|
||||
|
||||
@app.before_request
|
||||
def before_request():
|
||||
# app.logger.debug(f"Before request - Session ID: {session.sid}")
|
||||
app.logger.debug(f"Before request - Session data: {session}")
|
||||
app.logger.debug(f"Before request - Request headers: {request.headers}")
|
||||
|
||||
# Register API
|
||||
register_api(app)
|
||||
# @app.before_request
|
||||
# def before_request():
|
||||
# # app.logger.debug(f"Before request - Session ID: {session.sid}")
|
||||
# app.logger.debug(f"Before request - Session data: {session}")
|
||||
# app.logger.debug(f"Before request - Request headers: {request.headers}")
|
||||
|
||||
# Register template filters
|
||||
register_filters(app)
|
||||
@@ -138,9 +132,3 @@ def register_blueprints(app):
|
||||
app.register_blueprint(security_bp)
|
||||
from .views.interaction_views import interaction_bp
|
||||
app.register_blueprint(interaction_bp)
|
||||
|
||||
|
||||
def register_api(app):
|
||||
pass
|
||||
# from . import api
|
||||
# app.register_blueprint(api.bp, url_prefix='/api')
|
||||
|
||||
@@ -1,126 +1,80 @@
|
||||
{% extends "base.html" %}
|
||||
{% from "macros.html" import render_field %}
|
||||
|
||||
{% block title %}Session Overview{% endblock %}
|
||||
|
||||
{% block content_title %}Session Overview{% endblock %}
|
||||
{% block content_description %}An overview of the chat session.{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="container mt-5">
|
||||
<h2>Chat Session Details</h2>
|
||||
<!-- Session Information -->
|
||||
<div class="card mb-4">
|
||||
<div class="card-header">
|
||||
<h5>Session Information</h5>
|
||||
<!-- Timezone Toggle Buttons -->
|
||||
<div class="btn-group" role="group">
|
||||
<button type="button" class="btn btn-primary" id="toggle-interaction-timezone">Interaction Timezone</button>
|
||||
<button type="button" class="btn btn-secondary" id="toggle-admin-timezone">Admin Timezone</button>
|
||||
</div>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<dl class="row">
|
||||
<dt class="col-sm-3">Session ID:</dt>
|
||||
<dd class="col-sm-9">{{ chat_session.session_id }}</dd>
|
||||
|
||||
<dt class="col-sm-3">Session Start:</dt>
|
||||
<dd class="col-sm-9">
|
||||
<span class="timezone interaction-timezone">{{ chat_session.session_start | to_local_time(chat_session.timezone) }}</span>
|
||||
<span class="timezone admin-timezone d-none">{{ chat_session.session_start | to_local_time(session['admin_user_timezone']) }}</span>
|
||||
</dd>
|
||||
|
||||
<dt class="col-sm-3">Session End:</dt>
|
||||
<dd class="col-sm-9">
|
||||
{% if chat_session.session_end %}
|
||||
<span class="timezone interaction-timezone">{{ chat_session.session_end | to_local_time(chat_session.timezone) }}</span>
|
||||
<span class="timezone admin-timezone d-none">{{ chat_session.session_end | to_local_time(session['admin_user_timezone']) }}</span>
|
||||
{% else %}
|
||||
Ongoing
|
||||
{% endif %}
|
||||
</dd>
|
||||
</dl>
|
||||
<p><strong>Session ID:</strong> {{ chat_session.session_id }}</p>
|
||||
<p><strong>User:</strong> {{ chat_session.user.user_name if chat_session.user else 'Anonymous' }}</p>
|
||||
<p><strong>Start:</strong> {{ chat_session.session_start | to_local_time(chat_session.timezone) }}</p>
|
||||
<p><strong>End:</strong> {{ chat_session.session_end | to_local_time(chat_session.timezone) if chat_session.session_end else 'Ongoing' }}</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Interactions List -->
|
||||
<div class="card mb-4">
|
||||
<div class="card-header">
|
||||
<h5>Interactions</h5>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
{% for interaction in interactions %}
|
||||
<div class="interaction mb-3">
|
||||
<div class="card">
|
||||
<div class="card-header d-flex justify-content-between">
|
||||
<span>Question:</span>
|
||||
<span class="text-muted">
|
||||
<span class="timezone interaction-timezone">{{ interaction.question_at | to_local_time(interaction.timezone) }}</span>
|
||||
<span class="timezone admin-timezone d-none">{{ interaction.question_at | to_local_time(session['admin_user_timezone']) }}</span>
|
||||
-
|
||||
<span class="timezone interaction-timezone">{{ interaction.answer_at | to_local_time(interaction.timezone) }}</span>
|
||||
<span class="timezone admin-timezone d-none">{{ interaction.answer_at | to_local_time(session['admin_user_timezone']) }}</span>
|
||||
({{ interaction.question_at | time_difference(interaction.answer_at) }})
|
||||
</span>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<p><strong>Question:</strong> {{ interaction.question }}</p>
|
||||
<p><strong>Answer:</strong> {{ interaction.answer }}</p>
|
||||
<p>
|
||||
<strong>Algorithm Used:</strong>
|
||||
<i class="material-icons {{ 'fingerprint-rag-' ~ interaction.algorithm_used.lower() }}">
|
||||
fingerprint
|
||||
</i> {{ interaction.algorithm_used }}
|
||||
</p>
|
||||
<p>
|
||||
<strong>Appreciation:</strong>
|
||||
<i class="material-icons thumb-icon {{ 'thumb_up' if interaction.appreciation == 1 else 'thumb_down' }}">
|
||||
{{ 'thumb_up' if interaction.appreciation == 1 else 'thumb_down' }}
|
||||
</i>
|
||||
</p>
|
||||
<p><strong>Embeddings:</strong>
|
||||
{% if interaction.embeddings %}
|
||||
{% for embedding in interaction.embeddings %}
|
||||
<a href="{{ url_for('interaction_bp.view_embedding', embedding_id=embedding.embedding_id) }}" class="badge badge-info">
|
||||
{{ embedding.embedding_id }}
|
||||
</a>
|
||||
{% endfor %}
|
||||
{% else %}
|
||||
None
|
||||
{% endif %}
|
||||
</p>
|
||||
</div>
|
||||
<h3>Interactions</h3>
|
||||
<div class="accordion" id="interactionsAccordion">
|
||||
{% for interaction in interactions %}
|
||||
<div class="accordion-item">
|
||||
<h2 class="accordion-header" id="heading{{ loop.index }}">
|
||||
<button class="accordion-button collapsed" type="button" data-bs-toggle="collapse"
|
||||
data-bs-target="#collapse{{ loop.index }}" aria-expanded="false"
|
||||
aria-controls="collapse{{ loop.index }}">
|
||||
<div class="d-flex justify-content-between align-items-center w-100">
|
||||
<span class="interaction-question">{{ interaction.question | truncate(50) }}</span>
|
||||
<span class="interaction-icons">
|
||||
<i class="material-icons algorithm-icon {{ interaction.algorithm_used | lower }}">fingerprint</i>
|
||||
<i class="material-icons thumb-icon {% if interaction.appreciation == 100 %}filled{% else %}outlined{% endif %}">thumb_up</i>
|
||||
<i class="material-icons thumb-icon {% if interaction.appreciation == 0 %}filled{% else %}outlined{% endif %}">thumb_down</i>
|
||||
</span>
|
||||
</div>
|
||||
</button>
|
||||
</h2>
|
||||
<div id="collapse{{ loop.index }}" class="accordion-collapse collapse" aria-labelledby="heading{{ loop.index }}"
|
||||
data-bs-parent="#interactionsAccordion">
|
||||
<div class="accordion-body">
|
||||
<h6>Detailed Question:</h6>
|
||||
<p>{{ interaction.detailed_question }}</p>
|
||||
<h6>Answer:</h6>
|
||||
<div class="markdown-content">{{ interaction.answer | safe }}</div>
|
||||
{% if embeddings_dict.get(interaction.id) %}
|
||||
<h6>Related Documents:</h6>
|
||||
<ul>
|
||||
{% for embedding in embeddings_dict[interaction.id] %}
|
||||
<li>
|
||||
{% if embedding.url %}
|
||||
<a href="{{ embedding.url }}" target="_blank">{{ embedding.url }}</a>
|
||||
{% else %}
|
||||
{{ embedding.file_name }}
|
||||
{% endif %}
|
||||
</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% endif %}
|
||||
</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
</div>
|
||||
{% endblock %}
|
||||
|
||||
{% block scripts %}
|
||||
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
|
||||
<script>
|
||||
document.addEventListener('DOMContentLoaded', function() {
|
||||
// Elements to toggle
|
||||
const interactionTimes = document.querySelectorAll('.interaction-timezone');
|
||||
const adminTimes = document.querySelectorAll('.admin-timezone');
|
||||
|
||||
// Buttons
|
||||
const interactionButton = document.getElementById('toggle-interaction-timezone');
|
||||
const adminButton = document.getElementById('toggle-admin-timezone');
|
||||
|
||||
// Toggle to Interaction Timezone
|
||||
interactionButton.addEventListener('click', function() {
|
||||
interactionTimes.forEach(el => el.classList.remove('d-none'));
|
||||
adminTimes.forEach(el => el.classList.add('d-none'));
|
||||
interactionButton.classList.add('btn-primary');
|
||||
interactionButton.classList.remove('btn-secondary');
|
||||
adminButton.classList.add('btn-secondary');
|
||||
adminButton.classList.remove('btn-primary');
|
||||
});
|
||||
|
||||
// Toggle to Admin Timezone
|
||||
adminButton.addEventListener('click', function() {
|
||||
interactionTimes.forEach(el => el.classList.add('d-none'));
|
||||
adminTimes.forEach(el => el.classList.remove('d-none'));
|
||||
interactionButton.classList.add('btn-secondary');
|
||||
interactionButton.classList.remove('btn-primary');
|
||||
adminButton.classList.add('btn-primary');
|
||||
adminButton.classList.remove('btn-secondary');
|
||||
var markdownElements = document.querySelectorAll('.markdown-content');
|
||||
markdownElements.forEach(function(el) {
|
||||
el.innerHTML = marked.parse(el.textContent);
|
||||
});
|
||||
});
|
||||
</script>
|
||||
|
||||
@@ -84,7 +84,6 @@
|
||||
{'name': 'Add Document', 'url': '/document/add_document', 'roles': ['Super User', 'Tenant Admin']},
|
||||
{'name': 'Add URL', 'url': '/document/add_url', 'roles': ['Super User', 'Tenant Admin']},
|
||||
{'name': 'Add a list of URLs', 'url': '/document/add_urls', 'roles': ['Super User', 'Tenant Admin']},
|
||||
{'name': 'Add Youtube Document' , 'url': '/document/add_youtube', 'roles': ['Super User', 'Tenant Admin']},
|
||||
{'name': 'All Documents', 'url': '/document/documents', 'roles': ['Super User', 'Tenant Admin']},
|
||||
{'name': 'All Document Versions', 'url': '/document/document_versions_list', 'roles': ['Super User', 'Tenant Admin']},
|
||||
{'name': 'Library Operations', 'url': '/document/library_operations', 'roles': ['Super User', 'Tenant Admin']},
|
||||
|
||||
@@ -62,12 +62,19 @@
|
||||
{{ render_included_field(field, disabled_fields=license_fields, include_fields=license_fields) }}
|
||||
{% endfor %}
|
||||
<!-- Register API Key Button -->
|
||||
<button type="button" class="btn btn-primary" onclick="generateNewChatApiKey()">Register Chat API Key</button>
|
||||
<button type="button" class="btn btn-primary" onclick="generateNewApiKey()">Register API Key</button>
|
||||
<!-- API Key Display Field -->
|
||||
<div id="chat-api-key-field" style="display:none;">
|
||||
<label for="chat-api-key">Chat API Key:</label>
|
||||
<input type="text" id="chat-api-key" class="form-control" readonly>
|
||||
<button type="button" id="copy-chat-button" class="btn btn-primary">Copy to Clipboard</button>
|
||||
<p id="copy-chat-message" style="display:none;color:green;">Chat API key copied to clipboard</p>
|
||||
</div>
|
||||
<div id="api-key-field" style="display:none;">
|
||||
<label for="api-key">API Key:</label>
|
||||
<input type="text" id="api-key" class="form-control" readonly>
|
||||
<button type="button" id="copy-button" class="btn btn-primary">Copy to Clipboard</button>
|
||||
<button type="button" id="copy-api-button" class="btn btn-primary">Copy to Clipboard</button>
|
||||
<p id="copy-message" style="display:none;color:green;">API key copied to clipboard</p>
|
||||
</div>
|
||||
</div>
|
||||
@@ -105,14 +112,25 @@
|
||||
|
||||
{% block scripts %}
|
||||
<script>
|
||||
// Function to generate a new Chat API Key
|
||||
function generateNewChatApiKey() {
|
||||
generateApiKey('/admin/user/generate_chat_api_key', '#chat-api-key', '#chat-api-key-field');
|
||||
}
|
||||
|
||||
// Function to generate a new general API Key
|
||||
function generateNewApiKey() {
|
||||
generateApiKey('/admin/user/generate_api_api_key', '#api-key', '#api-key-field');
|
||||
}
|
||||
|
||||
// Reusable function to handle API key generation
|
||||
function generateApiKey(url, inputSelector, fieldSelector) {
|
||||
$.ajax({
|
||||
url: '/user/generate_chat_api_key',
|
||||
url: url,
|
||||
type: 'POST',
|
||||
contentType: 'application/json',
|
||||
success: function(response) {
|
||||
$('#api-key').val(response.api_key);
|
||||
$('#api-key-field').show();
|
||||
$(inputSelector).val(response.api_key);
|
||||
$(fieldSelector).show();
|
||||
},
|
||||
error: function(error) {
|
||||
alert('Error generating new API key: ' + error.responseText);
|
||||
@@ -120,25 +138,27 @@
|
||||
});
|
||||
}
|
||||
|
||||
function copyToClipboard(selector) {
|
||||
// Function to copy text to clipboard
|
||||
function copyToClipboard(selector, messageSelector) {
|
||||
const element = document.querySelector(selector);
|
||||
if (element) {
|
||||
const text = element.value;
|
||||
if (navigator.clipboard && navigator.clipboard.writeText) {
|
||||
navigator.clipboard.writeText(text).then(function() {
|
||||
showCopyMessage();
|
||||
showCopyMessage(messageSelector);
|
||||
}).catch(function(error) {
|
||||
alert('Failed to copy text: ' + error);
|
||||
});
|
||||
} else {
|
||||
fallbackCopyToClipboard(text);
|
||||
fallbackCopyToClipboard(text, messageSelector);
|
||||
}
|
||||
} else {
|
||||
console.error('Element not found for selector:', selector);
|
||||
}
|
||||
}
|
||||
|
||||
function fallbackCopyToClipboard(text) {
|
||||
// Fallback method for copying text to clipboard
|
||||
function fallbackCopyToClipboard(text, messageSelector) {
|
||||
const textArea = document.createElement('textarea');
|
||||
textArea.value = text;
|
||||
document.body.appendChild(textArea);
|
||||
@@ -146,15 +166,16 @@
|
||||
textArea.select();
|
||||
try {
|
||||
document.execCommand('copy');
|
||||
showCopyMessage();
|
||||
showCopyMessage(messageSelector);
|
||||
} catch (err) {
|
||||
alert('Fallback: Oops, unable to copy', err);
|
||||
}
|
||||
document.body.removeChild(textArea);
|
||||
}
|
||||
|
||||
function showCopyMessage() {
|
||||
const message = document.getElementById('copy-message');
|
||||
// Function to show copy confirmation message
|
||||
function showCopyMessage(messageSelector) {
|
||||
const message = document.querySelector(messageSelector);
|
||||
if (message) {
|
||||
message.style.display = 'block';
|
||||
setTimeout(function() {
|
||||
@@ -163,8 +184,13 @@
|
||||
}
|
||||
}
|
||||
|
||||
document.getElementById('copy-button').addEventListener('click', function() {
|
||||
copyToClipboard('#api-key');
|
||||
// Event listeners for copy buttons
|
||||
document.getElementById('copy-chat-button').addEventListener('click', function() {
|
||||
copyToClipboard('#chat-api-key', '#copy-chat-message');
|
||||
});
|
||||
|
||||
document.getElementById('copy-api-button').addEventListener('click', function() {
|
||||
copyToClipboard('#api-key', '#copy-message');
|
||||
});
|
||||
</script>
|
||||
<script>
|
||||
|
||||
@@ -1,14 +1,21 @@
|
||||
from flask import session
|
||||
from flask import session, current_app
|
||||
from flask_wtf import FlaskForm
|
||||
from wtforms import (StringField, BooleanField, SubmitField, DateField,
|
||||
SelectField, FieldList, FormField, TextAreaField, URLField)
|
||||
from wtforms.validators import DataRequired, Length, Optional, URL
|
||||
from wtforms.validators import DataRequired, Length, Optional, URL, ValidationError
|
||||
from flask_wtf.file import FileField, FileAllowed, FileRequired
|
||||
|
||||
|
||||
def allowed_file(form, field):
|
||||
if field.data:
|
||||
filename = field.data.filename
|
||||
allowed_extensions = current_app.config.get('SUPPORTED_FILE_TYPES', [])
|
||||
if not ('.' in filename and filename.rsplit('.', 1)[1].lower() in allowed_extensions):
|
||||
raise ValidationError('Unsupported file type.')
|
||||
|
||||
|
||||
class AddDocumentForm(FlaskForm):
|
||||
file = FileField('File', validators=[FileAllowed(['pdf', 'txt', 'html']),
|
||||
FileRequired()])
|
||||
file = FileField('File', validators=[FileRequired(), allowed_file])
|
||||
name = StringField('Name', validators=[Length(max=100)])
|
||||
language = SelectField('Language', choices=[], validators=[Optional()])
|
||||
user_context = TextAreaField('User Context', validators=[Optional()])
|
||||
@@ -20,6 +27,7 @@ class AddDocumentForm(FlaskForm):
|
||||
super().__init__()
|
||||
self.language.choices = [(language, language) for language in
|
||||
session.get('tenant').get('allowed_languages')]
|
||||
self.language.data = session.get('tenant').get('default_language')
|
||||
|
||||
|
||||
class AddURLForm(FlaskForm):
|
||||
@@ -35,6 +43,7 @@ class AddURLForm(FlaskForm):
|
||||
super().__init__()
|
||||
self.language.choices = [(language, language) for language in
|
||||
session.get('tenant').get('allowed_languages')]
|
||||
self.language.data = session.get('tenant').get('default_language')
|
||||
|
||||
|
||||
class AddURLsForm(FlaskForm):
|
||||
@@ -50,6 +59,7 @@ class AddURLsForm(FlaskForm):
|
||||
super().__init__()
|
||||
self.language.choices = [(language, language) for language in
|
||||
session.get('tenant').get('allowed_languages')]
|
||||
self.language.data = session.get('tenant').get('default_language')
|
||||
|
||||
|
||||
class AddYoutubeForm(FlaskForm):
|
||||
|
||||
@@ -18,8 +18,11 @@ from minio.error import S3Error
|
||||
|
||||
from common.models.document import Document, DocumentVersion
|
||||
from common.extensions import db, minio_client
|
||||
from .document_forms import AddDocumentForm, AddURLForm, EditDocumentForm, EditDocumentVersionForm, AddYoutubeForm, \
|
||||
AddURLsForm
|
||||
from common.utils.document_utils import validate_file_type, create_document_stack, start_embedding_task, process_url, \
|
||||
process_multiple_urls, prepare_youtube_document, create_version_for_document, upload_file_for_version
|
||||
from common.utils.eveai_exceptions import EveAIInvalidLanguageException, EveAIUnsupportedFileType, \
|
||||
EveAIDoubleURLException, EveAIYoutubeError
|
||||
from .document_forms import AddDocumentForm, AddURLForm, EditDocumentForm, EditDocumentVersionForm, AddURLsForm
|
||||
from common.utils.middleware import mw_before_request
|
||||
from common.utils.celery_utils import current_celery
|
||||
from common.utils.nginx_utils import prefixed_url_for
|
||||
@@ -56,30 +59,37 @@ def before_request():
|
||||
@roles_accepted('Super User', 'Tenant Admin')
|
||||
def add_document():
|
||||
form = AddDocumentForm()
|
||||
current_app.logger.debug('Adding document')
|
||||
|
||||
# If the form is submitted
|
||||
if form.validate_on_submit():
|
||||
current_app.logger.info(f'Adding document for tenant {session["tenant"]["id"]}')
|
||||
file = form.file.data
|
||||
filename = secure_filename(file.filename)
|
||||
extension = filename.rsplit('.', 1)[1].lower()
|
||||
form_dict = form_to_dict(form)
|
||||
try:
|
||||
current_app.logger.debug('Validating file type')
|
||||
tenant_id = session['tenant']['id']
|
||||
file = form.file.data
|
||||
filename = secure_filename(file.filename)
|
||||
extension = filename.rsplit('.', 1)[1].lower()
|
||||
|
||||
new_doc, new_doc_vers = create_document_stack(form_dict, file, filename, extension)
|
||||
validate_file_type(extension)
|
||||
|
||||
task = current_celery.send_task('create_embeddings', queue='embeddings', args=[
|
||||
session['tenant']['id'],
|
||||
new_doc_vers.id,
|
||||
])
|
||||
current_app.logger.info(f'Embedding creation started for tenant {session["tenant"]["id"]}, '
|
||||
f'Document Version {new_doc_vers.id}. '
|
||||
f'Embedding creation task: {task.id}')
|
||||
flash(f'Processing on document {new_doc.name}, version {new_doc_vers.id} started. Task ID: {task.id}.',
|
||||
'success')
|
||||
api_input = {
|
||||
'name': form.name.data,
|
||||
'language': form.language.data,
|
||||
'user_context': form.user_context.data,
|
||||
'valid_from': form.valid_from.data
|
||||
}
|
||||
|
||||
return redirect(prefixed_url_for('document_bp.documents'))
|
||||
else:
|
||||
form_validation_failed(request, form)
|
||||
new_doc, new_doc_vers = create_document_stack(api_input, file, filename, extension, tenant_id)
|
||||
task_id = start_embedding_task(tenant_id, new_doc_vers.id)
|
||||
|
||||
flash(f'Processing on document {new_doc.name}, version {new_doc_vers.id} started. Task ID: {task_id}.',
|
||||
'success')
|
||||
return redirect(prefixed_url_for('document_bp.documents'))
|
||||
|
||||
except (EveAIInvalidLanguageException, EveAIUnsupportedFileType) as e:
|
||||
flash(str(e), 'error')
|
||||
except Exception as e:
|
||||
current_app.logger.error(f'Error adding document: {str(e)}')
|
||||
flash('An error occurred while adding the document.', 'error')
|
||||
|
||||
return render_template('document/add_document.html', form=form)
|
||||
|
||||
@@ -90,192 +100,74 @@ def add_url():
|
||||
form = AddURLForm()
|
||||
|
||||
if form.validate_on_submit():
|
||||
current_app.logger.info(f'Adding url for tenant {session["tenant"]["id"]}')
|
||||
url = form.url.data
|
||||
|
||||
try:
|
||||
response = requests.head(url, allow_redirects=True)
|
||||
content_type = response.headers.get('Content-Type', '').split(';')[0]
|
||||
tenant_id = session['tenant']['id']
|
||||
url = form.url.data
|
||||
|
||||
# Determine file extension based on Content-Type
|
||||
extension = get_extension_from_content_type(content_type)
|
||||
file_content, filename, extension = process_url(url, tenant_id)
|
||||
|
||||
# Generate filename
|
||||
parsed_url = urlparse(url)
|
||||
path = unquote(parsed_url.path)
|
||||
filename = os.path.basename(path)
|
||||
api_input = {
|
||||
'name': form.name.data or filename,
|
||||
'url': url,
|
||||
'language': form.language.data,
|
||||
'user_context': form.user_context.data,
|
||||
'valid_from': form.valid_from.data
|
||||
}
|
||||
|
||||
if not filename or '.' not in filename:
|
||||
# Use the last part of the path or a default name
|
||||
filename = path.strip('/').split('/')[-1] or 'document'
|
||||
filename = secure_filename(f"{filename}.{extension}")
|
||||
else:
|
||||
filename = secure_filename(filename)
|
||||
new_doc, new_doc_vers = create_document_stack(api_input, file_content, filename, extension, tenant_id)
|
||||
task_id = start_embedding_task(tenant_id, new_doc_vers.id)
|
||||
|
||||
# Check if a document with this URL already exists
|
||||
existing_doc = DocumentVersion.query.filter_by(url=url).first()
|
||||
if existing_doc:
|
||||
flash(f'A document with URL {url} already exists. No new document created.', 'info')
|
||||
return redirect(prefixed_url_for('document_bp.documents'))
|
||||
|
||||
# Download the content
|
||||
response = requests.get(url)
|
||||
response.raise_for_status()
|
||||
file_content = response.content
|
||||
|
||||
# Create document and document version
|
||||
form_dict = form_to_dict(form)
|
||||
new_doc, new_doc_vers = create_document_stack(form_dict, file_content, filename, extension)
|
||||
|
||||
# Upload file to storage
|
||||
minio_client.upload_document_file(
|
||||
session['tenant']['id'],
|
||||
new_doc_vers.doc_id,
|
||||
new_doc_vers.language,
|
||||
new_doc_vers.id,
|
||||
filename,
|
||||
file_content
|
||||
)
|
||||
|
||||
# Start embedding task
|
||||
task = current_celery.send_task('create_embeddings', queue='embeddings', args=[
|
||||
session['tenant']['id'],
|
||||
new_doc_vers.id,
|
||||
])
|
||||
|
||||
current_app.logger.info(f'Embedding creation started for tenant {session["tenant"]["id"]}, '
|
||||
f'Document Version {new_doc_vers.id}. '
|
||||
f'Embedding creation task: {task.id}')
|
||||
flash(f'Processing on document {new_doc.name}, version {new_doc_vers.id} started. Task ID: {task.id}.',
|
||||
flash(f'Processing on document {new_doc.name}, version {new_doc_vers.id} started. Task ID: {task_id}.',
|
||||
'success')
|
||||
|
||||
return redirect(prefixed_url_for('document_bp.documents'))
|
||||
|
||||
except requests.RequestException as e:
|
||||
current_app.logger.error(f'Error fetching URL {url}: {str(e)}')
|
||||
flash(f'Error fetching URL: {str(e)}', 'danger')
|
||||
except SQLAlchemyError as e:
|
||||
current_app.logger.error(f'Database error: {str(e)}')
|
||||
flash('An error occurred while saving the document.', 'danger')
|
||||
except EveAIDoubleURLException:
|
||||
flash(f'A document with url {url} already exists. No new document created.', 'info')
|
||||
except (EveAIInvalidLanguageException, EveAIUnsupportedFileType) as e:
|
||||
flash(str(e), 'error')
|
||||
except Exception as e:
|
||||
current_app.logger.error(f'Unexpected error: {str(e)}')
|
||||
flash('An unexpected error occurred.', 'danger')
|
||||
current_app.logger.error(f'Error adding document: {str(e)}')
|
||||
flash('An error occurred while adding the document.', 'error')
|
||||
|
||||
return render_template('document/add_url.html', form=form)
|
||||
|
||||
|
||||
def get_extension_from_content_type(content_type):
|
||||
content_type_map = {
|
||||
'text/html': 'html',
|
||||
'application/pdf': 'pdf',
|
||||
'text/plain': 'txt',
|
||||
'application/msword': 'doc',
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'docx',
|
||||
# Add more mappings as needed
|
||||
}
|
||||
return content_type_map.get(content_type, 'html') # Default to 'html' if unknown
|
||||
|
||||
|
||||
@document_bp.route('/add_urls', methods=['GET', 'POST'])
|
||||
@roles_accepted('Super User', 'Tenant Admin')
|
||||
def add_urls():
|
||||
form = AddURLsForm()
|
||||
|
||||
if form.validate_on_submit():
|
||||
urls = form.urls.data.split('\n')
|
||||
urls = [url.strip() for url in urls if url.strip()]
|
||||
try:
|
||||
tenant_id = session['tenant']['id']
|
||||
urls = form.urls.data.split('\n')
|
||||
urls = [url.strip() for url in urls if url.strip()]
|
||||
|
||||
for i, url in enumerate(urls):
|
||||
try:
|
||||
doc_vers = DocumentVersion.query.filter_by(url=url).all()
|
||||
if doc_vers:
|
||||
current_app.logger.info(f'A document with url {url} already exists. No new document created.')
|
||||
flash(f'A document with url {url} already exists. No new document created.', 'info')
|
||||
continue
|
||||
api_input = {
|
||||
'name': form.name.data,
|
||||
'language': form.language.data,
|
||||
'user_context': form.user_context.data,
|
||||
'valid_from': form.valid_from.data
|
||||
}
|
||||
|
||||
html = fetch_html(url)
|
||||
file = io.BytesIO(html)
|
||||
results = process_multiple_urls(urls, tenant_id, api_input)
|
||||
|
||||
parsed_url = urlparse(url)
|
||||
path_parts = parsed_url.path.split('/')
|
||||
filename = path_parts[-1] if path_parts[-1] else 'index'
|
||||
if not filename.endswith('.html'):
|
||||
filename += '.html'
|
||||
for result in results:
|
||||
if result['status'] == 'success':
|
||||
flash(
|
||||
f"Processed URL: {result['url']} - Document ID: {result['document_id']}, Version ID: {result['document_version_id']}",
|
||||
'success')
|
||||
else:
|
||||
flash(f"Error processing URL: {result['url']} - {result['message']}", 'error')
|
||||
|
||||
# Use the name prefix if provided, otherwise use the filename
|
||||
doc_name = f"{form.name.data}-{filename}" if form.name.data else filename
|
||||
return redirect(prefixed_url_for('document_bp.documents'))
|
||||
|
||||
new_doc, new_doc_vers = create_document_stack({
|
||||
'name': doc_name,
|
||||
'url': url,
|
||||
'language': form.language.data,
|
||||
'user_context': form.user_context.data,
|
||||
'valid_from': form.valid_from.data
|
||||
}, file, filename, 'html')
|
||||
|
||||
task = current_celery.send_task('create_embeddings', queue='embeddings', args=[
|
||||
session['tenant']['id'],
|
||||
new_doc_vers.id,
|
||||
])
|
||||
current_app.logger.info(f'Embedding creation started for tenant {session["tenant"]["id"]}, '
|
||||
f'Document Version {new_doc_vers.id}. '
|
||||
f'Embedding creation task: {task.id}')
|
||||
flash(f'Processing on document {new_doc.name}, version {new_doc_vers.id} started. Task ID: {task.id}.',
|
||||
'success')
|
||||
|
||||
except Exception as e:
|
||||
current_app.logger.error(f"Error processing URL {url}: {str(e)}")
|
||||
flash(f'Error processing URL {url}: {str(e)}', 'danger')
|
||||
|
||||
return redirect(prefixed_url_for('document_bp.documents'))
|
||||
else:
|
||||
form_validation_failed(request, form)
|
||||
except Exception as e:
|
||||
current_app.logger.error(f'Error adding multiple URLs: {str(e)}')
|
||||
flash('An error occurred while adding the URLs.', 'error')
|
||||
|
||||
return render_template('document/add_urls.html', form=form)
|
||||
|
||||
@document_bp.route('/add_youtube', methods=['GET', 'POST'])
|
||||
@roles_accepted('Super User', 'Tenant Admin')
|
||||
def add_youtube():
|
||||
form = AddYoutubeForm()
|
||||
|
||||
if form.validate_on_submit():
|
||||
current_app.logger.info(f'Adding Youtube document for tenant {session["tenant"]["id"]}')
|
||||
url = form.url.data
|
||||
current_app.logger.debug(f'Value of language field: {form.language.data}')
|
||||
|
||||
doc_vers = DocumentVersion.query.filter_by(url=url).all()
|
||||
if doc_vers:
|
||||
current_app.logger.info(f'A document with url {url} already exists. No new document created.')
|
||||
flash(f'A document with url {url} already exists. No new document created.', 'info')
|
||||
return redirect(prefixed_url_for('document_bp.documents'))
|
||||
# As downloading a Youtube document can take quite some time, we offload this downloading to the worker
|
||||
# We just pass a simple file to get things conform
|
||||
file = "Youtube placeholder file"
|
||||
|
||||
filename = 'placeholder.youtube'
|
||||
extension = 'youtube'
|
||||
form_dict = form_to_dict(form)
|
||||
current_app.logger.debug(f'Form data: {form_dict}')
|
||||
|
||||
new_doc, new_doc_vers = create_document_stack(form_dict, file, filename, extension)
|
||||
|
||||
task = current_celery.send_task('create_embeddings', queue='embeddings', args=[
|
||||
session['tenant']['id'],
|
||||
new_doc_vers.id,
|
||||
])
|
||||
current_app.logger.info(f'Processing and Embedding on Youtube document started for tenant '
|
||||
f'{session["tenant"]["id"]}, '
|
||||
f'Document Version {new_doc_vers.id}. '
|
||||
f'Processing and Embedding Youtube task: {task.id}')
|
||||
flash(f'Processing on Youtube document {new_doc.name}, version {new_doc_vers.id} started. Task ID: {task.id}.',
|
||||
'success')
|
||||
|
||||
return redirect(prefixed_url_for('document_bp.documents'))
|
||||
else:
|
||||
form_validation_failed(request, form)
|
||||
|
||||
return render_template('document/add_youtube.html', form=form)
|
||||
|
||||
|
||||
@document_bp.route('/documents', methods=['GET', 'POST'])
|
||||
@roles_accepted('Super User', 'Tenant Admin')
|
||||
@@ -487,7 +379,7 @@ def refresh_document(doc_id):
|
||||
current_app.logger.info(f'Document added successfully for tenant {session["tenant"]["id"]}, '
|
||||
f'Document Version {new_doc_vers.id}')
|
||||
|
||||
upload_file_for_version(new_doc_vers, file, extension)
|
||||
upload_file_for_version(new_doc_vers, file, extension, session["tenant"]["id"])
|
||||
|
||||
task = current_celery.send_task('create_embeddings', queue='embeddings', args=[
|
||||
session['tenant']['id'],
|
||||
@@ -535,116 +427,11 @@ def update_logging_information(obj, timestamp):
|
||||
obj.updated_by = current_user.id
|
||||
|
||||
|
||||
def create_document_stack(form, file, filename, extension):
|
||||
# Create the Document
|
||||
new_doc = create_document(form, filename)
|
||||
|
||||
# Create the DocumentVersion
|
||||
new_doc_vers = create_version_for_document(new_doc,
|
||||
form.get('url', ''),
|
||||
form.get('language', 'en'),
|
||||
form.get('user_context', '')
|
||||
)
|
||||
|
||||
try:
|
||||
db.session.add(new_doc)
|
||||
db.session.add(new_doc_vers)
|
||||
db.session.commit()
|
||||
except SQLAlchemyError as e:
|
||||
current_app.logger.error(f'Error adding document for tenant {session["tenant"]["id"]}: {e}')
|
||||
flash('Error adding document.', 'alert')
|
||||
db.session.rollback()
|
||||
error = e.args
|
||||
raise
|
||||
except Exception as e:
|
||||
current_app.logger.error('Unknown error')
|
||||
raise
|
||||
|
||||
current_app.logger.info(f'Document added successfully for tenant {session["tenant"]["id"]}, '
|
||||
f'Document Version {new_doc.id}')
|
||||
|
||||
upload_file_for_version(new_doc_vers, file, extension)
|
||||
|
||||
return new_doc, new_doc_vers
|
||||
|
||||
|
||||
def log_session_state(session, msg=""):
|
||||
current_app.logger.debug(f"{msg} - Session dirty: {session.dirty}")
|
||||
current_app.logger.debug(f"{msg} - Session new: {session.new}")
|
||||
|
||||
|
||||
def create_document(form, filename):
|
||||
new_doc = Document()
|
||||
if form['name'] == '':
|
||||
new_doc.name = filename.rsplit('.', 1)[0]
|
||||
else:
|
||||
new_doc.name = form['name']
|
||||
|
||||
if form['valid_from'] and form['valid_from'] != '':
|
||||
new_doc.valid_from = form['valid_from']
|
||||
else:
|
||||
new_doc.valid_from = dt.now(tz.utc)
|
||||
new_doc.tenant_id = session['tenant']['id']
|
||||
set_logging_information(new_doc, dt.now(tz.utc))
|
||||
|
||||
return new_doc
|
||||
|
||||
|
||||
def create_version_for_document(document, url, language, user_context):
|
||||
new_doc_vers = DocumentVersion()
|
||||
if url != '':
|
||||
new_doc_vers.url = url
|
||||
|
||||
if language == '':
|
||||
new_doc_vers.language = session['default_language']
|
||||
else:
|
||||
new_doc_vers.language = language
|
||||
|
||||
if user_context != '':
|
||||
new_doc_vers.user_context = user_context
|
||||
|
||||
new_doc_vers.document = document
|
||||
|
||||
set_logging_information(new_doc_vers, dt.now(tz.utc))
|
||||
|
||||
return new_doc_vers
|
||||
|
||||
|
||||
def upload_file_for_version(doc_vers, file, extension):
|
||||
doc_vers.file_type = extension
|
||||
doc_vers.file_name = doc_vers.calc_file_name()
|
||||
doc_vers.file_location = doc_vers.calc_file_location()
|
||||
|
||||
# Normally, the tenant bucket should exist. But let's be on the safe side if a migration took place.
|
||||
tenant_id = session['tenant']['id']
|
||||
minio_client.create_tenant_bucket(tenant_id)
|
||||
|
||||
try:
|
||||
minio_client.upload_document_file(
|
||||
tenant_id,
|
||||
doc_vers.doc_id,
|
||||
doc_vers.language,
|
||||
doc_vers.id,
|
||||
doc_vers.file_name,
|
||||
file
|
||||
)
|
||||
db.session.commit()
|
||||
current_app.logger.info(f'Successfully saved document to MinIO for tenant {tenant_id} for '
|
||||
f'document version {doc_vers.id} while uploading file.')
|
||||
except S3Error as e:
|
||||
db.session.rollback()
|
||||
flash('Error saving document to MinIO.', 'error')
|
||||
current_app.logger.error(
|
||||
f'Error saving document to MinIO for tenant {tenant_id}: {e}')
|
||||
raise
|
||||
except SQLAlchemyError as e:
|
||||
db.session.rollback()
|
||||
flash('Error saving document metadata.', 'error')
|
||||
current_app.logger.error(
|
||||
f'Error saving document metadata for tenant {tenant_id}: {e}')
|
||||
raise
|
||||
|
||||
|
||||
def fetch_html(url):
|
||||
# Fetches HTML content from a URL
|
||||
try:
|
||||
|
||||
@@ -15,7 +15,8 @@ from requests.exceptions import SSLError
|
||||
from urllib.parse import urlparse
|
||||
import io
|
||||
|
||||
from common.models.interaction import ChatSession, Interaction
|
||||
from common.models.document import Embedding, DocumentVersion
|
||||
from common.models.interaction import ChatSession, Interaction, InteractionEmbedding
|
||||
from common.extensions import db
|
||||
from .document_forms import AddDocumentForm, AddURLForm, EditDocumentForm, EditDocumentVersionForm
|
||||
from common.utils.middleware import mw_before_request
|
||||
@@ -80,11 +81,34 @@ def handle_chat_session_selection():
|
||||
return redirect(prefixed_url_for('interaction_bp.chat_sessions'))
|
||||
|
||||
|
||||
@interaction_bp.route('/view_chat_session/<chat_session_id>', methods=['GET'])
|
||||
@interaction_bp.route('/view_chat_session/<int:chat_session_id>', methods=['GET'])
|
||||
@roles_accepted('Super User', 'Tenant Admin')
|
||||
def view_chat_session(chat_session_id):
|
||||
chat_session = ChatSession.query.get_or_404(chat_session_id)
|
||||
show_chat_session(chat_session)
|
||||
interactions = (Interaction.query
|
||||
.filter_by(chat_session_id=chat_session.id)
|
||||
.order_by(Interaction.question_at)
|
||||
.all())
|
||||
|
||||
# Fetch all related embeddings for the interactions in this session
|
||||
embedding_query = (db.session.query(InteractionEmbedding.interaction_id,
|
||||
DocumentVersion.url,
|
||||
DocumentVersion.file_name)
|
||||
.join(Embedding, InteractionEmbedding.embedding_id == Embedding.id)
|
||||
.join(DocumentVersion, Embedding.doc_vers_id == DocumentVersion.id)
|
||||
.filter(InteractionEmbedding.interaction_id.in_([i.id for i in interactions])))
|
||||
|
||||
# Create a dictionary to store embeddings for each interaction
|
||||
embeddings_dict = {}
|
||||
for interaction_id, url, file_name in embedding_query:
|
||||
if interaction_id not in embeddings_dict:
|
||||
embeddings_dict[interaction_id] = []
|
||||
embeddings_dict[interaction_id].append({'url': url, 'file_name': file_name})
|
||||
|
||||
return render_template('interaction/view_chat_session.html',
|
||||
chat_session=chat_session,
|
||||
interactions=interactions,
|
||||
embeddings_dict=embeddings_dict)
|
||||
|
||||
|
||||
@interaction_bp.route('/view_chat_session_by_session_id/<session_id>', methods=['GET'])
|
||||
|
||||
@@ -435,6 +435,36 @@ def generate_chat_api_key():
|
||||
tenant.encrypted_chat_api_key = simple_encryption.encrypt_api_key(new_api_key)
|
||||
update_logging_information(tenant, dt.now(tz.utc))
|
||||
|
||||
try:
|
||||
db.session.add(tenant)
|
||||
db.session.commit()
|
||||
except SQLAlchemyError as e:
|
||||
db.session.rollback()
|
||||
current_app.logger.error(f'Unable to store chat api key for tenant {tenant.id}. Error: {str(e)}')
|
||||
|
||||
return jsonify({'api_key': new_api_key}), 200
|
||||
|
||||
|
||||
@user_bp.route('/check_api_api_key', methods=['POST'])
|
||||
@roles_accepted('Super User', 'Tenant Admin')
|
||||
def check_api_api_key():
|
||||
tenant_id = session['tenant']['id']
|
||||
tenant = Tenant.query.get_or_404(tenant_id)
|
||||
|
||||
if tenant.encrypted_api_key:
|
||||
return jsonify({'api_key_exists': True})
|
||||
return jsonify({'api_key_exists': False})
|
||||
|
||||
|
||||
@user_bp.route('/generate_api_api_key', methods=['POST'])
|
||||
@roles_accepted('Super User', 'Tenant Admin')
|
||||
def generate_api_api_key():
|
||||
tenant = Tenant.query.get_or_404(session['tenant']['id'])
|
||||
|
||||
new_api_key = generate_api_key(prefix="EveAI-API")
|
||||
tenant.encrypted_api_key = simple_encryption.encrypt_api_key(new_api_key)
|
||||
update_logging_information(tenant, dt.now(tz.utc))
|
||||
|
||||
try:
|
||||
db.session.add(tenant)
|
||||
db.session.commit()
|
||||
|
||||
187
eveai_workers/Processors/audio_processor.py
Normal file
187
eveai_workers/Processors/audio_processor.py
Normal file
@@ -0,0 +1,187 @@
|
||||
import io
|
||||
import os
|
||||
from pydub import AudioSegment
|
||||
import tempfile
|
||||
from langchain_core.output_parsers import StrOutputParser
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
from langchain_core.runnables import RunnablePassthrough
|
||||
from common.extensions import minio_client
|
||||
from common.utils.model_utils import create_language_template
|
||||
from .processor import Processor
|
||||
import subprocess
|
||||
|
||||
|
||||
class AudioProcessor(Processor):
|
||||
def __init__(self, tenant, model_variables, document_version):
|
||||
super().__init__(tenant, model_variables, document_version)
|
||||
self.transcription_client = model_variables['transcription_client']
|
||||
self.transcription_model = model_variables['transcription_model']
|
||||
self.ffmpeg_path = 'ffmpeg'
|
||||
|
||||
|
||||
def process(self):
|
||||
self._log("Starting Audio processing")
|
||||
try:
|
||||
file_data = minio_client.download_document_file(
|
||||
self.tenant.id,
|
||||
self.document_version.doc_id,
|
||||
self.document_version.language,
|
||||
self.document_version.id,
|
||||
self.document_version.file_name
|
||||
)
|
||||
|
||||
compressed_audio = self._compress_audio(file_data)
|
||||
transcription = self._transcribe_audio(compressed_audio)
|
||||
markdown, title = self._generate_markdown_from_transcription(transcription)
|
||||
|
||||
self._save_markdown(markdown)
|
||||
self._log("Finished processing Audio")
|
||||
return markdown, title
|
||||
except Exception as e:
|
||||
self._log(f"Error processing Audio: {str(e)}", level='error')
|
||||
raise
|
||||
|
||||
def _compress_audio(self, audio_data):
|
||||
self._log("Compressing audio")
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=f'.{self.document_version.file_type}') as temp_input:
|
||||
temp_input.write(audio_data)
|
||||
temp_input.flush()
|
||||
|
||||
# Use a unique filename for the output to avoid conflicts
|
||||
output_filename = f'compressed_{os.urandom(8).hex()}.mp3'
|
||||
output_path = os.path.join(tempfile.gettempdir(), output_filename)
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[self.ffmpeg_path, '-y', '-i', temp_input.name, '-b:a', '64k', '-f', 'mp3', output_path],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True
|
||||
)
|
||||
|
||||
with open(output_path, 'rb') as f:
|
||||
compressed_data = f.read()
|
||||
|
||||
# Save compressed audio to MinIO
|
||||
compressed_filename = f"{self.document_version.id}_compressed.mp3"
|
||||
minio_client.upload_document_file(
|
||||
self.tenant.id,
|
||||
self.document_version.doc_id,
|
||||
self.document_version.language,
|
||||
self.document_version.id,
|
||||
compressed_filename,
|
||||
compressed_data
|
||||
)
|
||||
self._log(f"Saved compressed audio to MinIO: {compressed_filename}")
|
||||
|
||||
return compressed_data
|
||||
|
||||
except subprocess.CalledProcessError as e:
|
||||
error_message = f"Compression failed: {e.stderr}"
|
||||
self._log(error_message, level='error')
|
||||
raise Exception(error_message)
|
||||
|
||||
finally:
|
||||
# Clean up temporary files
|
||||
os.unlink(temp_input.name)
|
||||
if os.path.exists(output_path):
|
||||
os.unlink(output_path)
|
||||
|
||||
def _transcribe_audio(self, audio_data):
|
||||
self._log("Starting audio transcription")
|
||||
audio = AudioSegment.from_file(io.BytesIO(audio_data), format="mp3")
|
||||
|
||||
segment_length = 10 * 60 * 1000 # 10 minutes in milliseconds
|
||||
transcriptions = []
|
||||
|
||||
for i, chunk in enumerate(audio[::segment_length]):
|
||||
self._log(f'Processing chunk {i + 1} of {len(audio) // segment_length + 1}')
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio:
|
||||
chunk.export(temp_audio.name, format="mp3")
|
||||
temp_audio.flush()
|
||||
|
||||
try:
|
||||
file_size = os.path.getsize(temp_audio.name)
|
||||
self._log(f"Temporary audio file size: {file_size} bytes")
|
||||
|
||||
with open(temp_audio.name, 'rb') as audio_file:
|
||||
file_start = audio_file.read(100)
|
||||
self._log(f"First 100 bytes of audio file: {file_start}")
|
||||
audio_file.seek(0) # Reset file pointer to the beginning
|
||||
|
||||
self._log("Calling transcription API")
|
||||
transcription = self.transcription_client.audio.transcriptions.create(
|
||||
file=audio_file,
|
||||
model=self.transcription_model,
|
||||
language=self.document_version.language,
|
||||
response_format='verbose_json',
|
||||
)
|
||||
self._log("Transcription API call completed")
|
||||
|
||||
if transcription:
|
||||
# Handle the transcription result based on its type
|
||||
if isinstance(transcription, str):
|
||||
self._log(f"Transcription result (string): {transcription[:100]}...")
|
||||
transcriptions.append(transcription)
|
||||
elif hasattr(transcription, 'text'):
|
||||
self._log(
|
||||
f"Transcription result (object with 'text' attribute): {transcription.text[:100]}...")
|
||||
transcriptions.append(transcription.text)
|
||||
else:
|
||||
self._log(f"Transcription result (unknown type): {str(transcription)[:100]}...")
|
||||
transcriptions.append(str(transcription))
|
||||
else:
|
||||
self._log("Warning: Received empty transcription", level='warning')
|
||||
|
||||
except Exception as e:
|
||||
self._log(f"Error during transcription: {str(e)}", level='error')
|
||||
finally:
|
||||
os.unlink(temp_audio.name)
|
||||
|
||||
full_transcription = " ".join(filter(None, transcriptions))
|
||||
|
||||
if not full_transcription:
|
||||
self._log("Warning: No transcription was generated", level='warning')
|
||||
full_transcription = "No transcription available."
|
||||
|
||||
# Save transcription to MinIO
|
||||
transcription_filename = f"{self.document_version.id}_transcription.txt"
|
||||
minio_client.upload_document_file(
|
||||
self.tenant.id,
|
||||
self.document_version.doc_id,
|
||||
self.document_version.language,
|
||||
self.document_version.id,
|
||||
transcription_filename,
|
||||
full_transcription.encode('utf-8')
|
||||
)
|
||||
self._log(f"Saved transcription to MinIO: {transcription_filename}")
|
||||
|
||||
return full_transcription
|
||||
|
||||
def _generate_markdown_from_transcription(self, transcription):
|
||||
self._log("Generating markdown from transcription")
|
||||
llm = self.model_variables['llm']
|
||||
template = self.model_variables['transcript_template']
|
||||
language_template = create_language_template(template, self.document_version.language)
|
||||
transcript_prompt = ChatPromptTemplate.from_template(language_template)
|
||||
setup = RunnablePassthrough()
|
||||
output_parser = StrOutputParser()
|
||||
|
||||
chain = setup | transcript_prompt | llm | output_parser
|
||||
|
||||
input_transcript = {'transcript': transcription}
|
||||
markdown = chain.invoke(input_transcript)
|
||||
|
||||
# Extract title from the markdown
|
||||
title = self._extract_title_from_markdown(markdown)
|
||||
|
||||
return markdown, title
|
||||
|
||||
def _extract_title_from_markdown(self, markdown):
|
||||
# Simple extraction of the first header as the title
|
||||
lines = markdown.split('\n')
|
||||
for line in lines:
|
||||
if line.startswith('# '):
|
||||
return line[2:].strip()
|
||||
return "Untitled Audio Transcription"
|
||||
142
eveai_workers/Processors/html_processor.py
Normal file
142
eveai_workers/Processors/html_processor.py
Normal file
@@ -0,0 +1,142 @@
|
||||
from bs4 import BeautifulSoup
|
||||
from langchain_core.output_parsers import StrOutputParser
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
from langchain_core.runnables import RunnablePassthrough
|
||||
from common.extensions import db, minio_client
|
||||
from common.utils.model_utils import create_language_template
|
||||
from .processor import Processor
|
||||
|
||||
|
||||
class HTMLProcessor(Processor):
|
||||
def __init__(self, tenant, model_variables, document_version):
|
||||
super().__init__(tenant, model_variables, document_version)
|
||||
self.html_tags = model_variables['html_tags']
|
||||
self.html_end_tags = model_variables['html_end_tags']
|
||||
self.html_included_elements = model_variables['html_included_elements']
|
||||
self.html_excluded_elements = model_variables['html_excluded_elements']
|
||||
|
||||
def process(self):
|
||||
self._log("Starting HTML processing")
|
||||
try:
|
||||
file_data = minio_client.download_document_file(
|
||||
self.tenant.id,
|
||||
self.document_version.doc_id,
|
||||
self.document_version.language,
|
||||
self.document_version.id,
|
||||
self.document_version.file_name
|
||||
)
|
||||
html_content = file_data.decode('utf-8')
|
||||
|
||||
extracted_html, title = self._parse_html(html_content)
|
||||
markdown = self._generate_markdown_from_html(extracted_html)
|
||||
|
||||
self._save_markdown(markdown)
|
||||
self._log("Finished processing HTML")
|
||||
return markdown, title
|
||||
except Exception as e:
|
||||
self._log(f"Error processing HTML: {str(e)}", level='error')
|
||||
raise
|
||||
|
||||
def _parse_html(self, html_content):
|
||||
self._log(f'Parsing HTML for tenant {self.tenant.id}')
|
||||
soup = BeautifulSoup(html_content, 'html.parser')
|
||||
extracted_html = ''
|
||||
excluded_classes = self._parse_excluded_classes(self.tenant.html_excluded_classes)
|
||||
|
||||
if self.html_included_elements:
|
||||
elements_to_parse = soup.find_all(self.html_included_elements)
|
||||
else:
|
||||
elements_to_parse = [soup]
|
||||
|
||||
for element in elements_to_parse:
|
||||
for sub_element in element.find_all(self.html_tags):
|
||||
if self._should_exclude_element(sub_element, excluded_classes):
|
||||
continue
|
||||
extracted_html += self._extract_element_content(sub_element)
|
||||
|
||||
title = soup.find('title').get_text(strip=True) if soup.find('title') else ''
|
||||
|
||||
self._log(f'Finished parsing HTML for tenant {self.tenant.id}')
|
||||
return extracted_html, title
|
||||
|
||||
def _generate_markdown_from_html(self, html_content):
|
||||
self._log(f'Generating markdown from HTML for tenant {self.tenant.id}')
|
||||
|
||||
llm = self.model_variables['llm']
|
||||
template = self.model_variables['html_parse_template']
|
||||
parse_prompt = ChatPromptTemplate.from_template(template)
|
||||
setup = RunnablePassthrough()
|
||||
output_parser = StrOutputParser()
|
||||
chain = setup | parse_prompt | llm | output_parser
|
||||
|
||||
soup = BeautifulSoup(html_content, 'lxml')
|
||||
chunks = self._split_content(soup)
|
||||
|
||||
markdown_chunks = []
|
||||
for chunk in chunks:
|
||||
if self.embed_tuning:
|
||||
self._log(f'Processing chunk: \n{chunk}\n')
|
||||
input_html = {"html": chunk}
|
||||
markdown_chunk = chain.invoke(input_html)
|
||||
markdown_chunks.append(markdown_chunk)
|
||||
if self.embed_tuning:
|
||||
self._log(f'Processed markdown chunk: \n{markdown_chunk}\n')
|
||||
|
||||
markdown = "\n\n".join(markdown_chunks)
|
||||
self._log(f'Finished generating markdown from HTML for tenant {self.tenant.id}')
|
||||
return markdown
|
||||
|
||||
def _split_content(self, soup, max_size=20000):
|
||||
chunks = []
|
||||
current_chunk = []
|
||||
current_size = 0
|
||||
|
||||
for element in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'div', 'span', 'table']):
|
||||
element_html = str(element)
|
||||
element_size = len(element_html)
|
||||
|
||||
if current_size + element_size > max_size and current_chunk:
|
||||
chunks.append(''.join(map(str, current_chunk)))
|
||||
current_chunk = []
|
||||
current_size = 0
|
||||
|
||||
current_chunk.append(element)
|
||||
current_size += element_size
|
||||
|
||||
if element.name in ['h1', 'h2', 'h3'] and current_size > max_size:
|
||||
chunks.append(''.join(map(str, current_chunk)))
|
||||
current_chunk = []
|
||||
current_size = 0
|
||||
|
||||
if current_chunk:
|
||||
chunks.append(''.join(map(str, current_chunk)))
|
||||
|
||||
return chunks
|
||||
|
||||
def _parse_excluded_classes(self, excluded_classes):
|
||||
parsed = {}
|
||||
for rule in excluded_classes:
|
||||
element, cls = rule.split('.', 1)
|
||||
parsed.setdefault(element, set()).add(cls)
|
||||
return parsed
|
||||
|
||||
def _should_exclude_element(self, element, excluded_classes):
|
||||
if self.html_excluded_elements and element.find_parent(self.html_excluded_elements):
|
||||
return True
|
||||
return self._is_element_excluded_by_class(element, excluded_classes)
|
||||
|
||||
def _is_element_excluded_by_class(self, element, excluded_classes):
|
||||
for parent in element.parents:
|
||||
if self._element_matches_exclusion(parent, excluded_classes):
|
||||
return True
|
||||
return self._element_matches_exclusion(element, excluded_classes)
|
||||
|
||||
def _element_matches_exclusion(self, element, excluded_classes):
|
||||
if '*' in excluded_classes and any(cls in excluded_classes['*'] for cls in element.get('class', [])):
|
||||
return True
|
||||
return element.name in excluded_classes and \
|
||||
any(cls in excluded_classes[element.name] for cls in element.get('class', []))
|
||||
|
||||
def _extract_element_content(self, element):
|
||||
content = ' '.join(child.strip() for child in element.stripped_strings)
|
||||
return f'<{element.name}>{content}</{element.name}>\n'
|
||||
@@ -5,29 +5,23 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
from langchain_core.output_parsers import StrOutputParser
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
import re
|
||||
|
||||
from langchain_core.runnables import RunnablePassthrough
|
||||
|
||||
from common.extensions import minio_client
|
||||
from common.utils.model_utils import create_language_template
|
||||
from .processor import Processor
|
||||
|
||||
|
||||
class PDFProcessor:
|
||||
class PDFProcessor(Processor):
|
||||
def __init__(self, tenant, model_variables, document_version):
|
||||
self.tenant = tenant
|
||||
self.model_variables = model_variables
|
||||
self.document_version = document_version
|
||||
|
||||
# Configuration parameters from model_variables
|
||||
super().__init__(tenant, model_variables, document_version)
|
||||
# PDF-specific initialization
|
||||
self.chunk_size = model_variables['PDF_chunk_size']
|
||||
self.chunk_overlap = model_variables['PDF_chunk_overlap']
|
||||
self.min_chunk_size = model_variables['PDF_min_chunk_size']
|
||||
self.max_chunk_size = model_variables['PDF_max_chunk_size']
|
||||
|
||||
# Set tuning variable for easy use
|
||||
self.embed_tuning = model_variables['embed_tuning']
|
||||
|
||||
def process_pdf(self):
|
||||
def process(self):
|
||||
self._log("Starting PDF processing")
|
||||
try:
|
||||
file_data = minio_client.download_document_file(
|
||||
@@ -51,11 +45,6 @@ class PDFProcessor:
|
||||
self._log(f"Error processing PDF: {str(e)}", level='error')
|
||||
raise
|
||||
|
||||
def _log(self, message, level='debug'):
|
||||
logger = current_app.logger
|
||||
log_method = getattr(logger, level)
|
||||
log_method(f"PDFProcessor - Tenant {self.tenant.id}, Document {self.document_version.id}: {message}")
|
||||
|
||||
def _extract_content(self, file_data):
|
||||
extracted_content = []
|
||||
with pdfplumber.open(io.BytesIO(file_data)) as pdf:
|
||||
@@ -248,24 +237,3 @@ class PDFProcessor:
|
||||
markdown_chunks.append(result)
|
||||
|
||||
return "\n\n".join(markdown_chunks)
|
||||
|
||||
def _save_markdown(self, markdown):
|
||||
markdown_filename = f"{self.document_version.id}.md"
|
||||
minio_client.upload_document_file(
|
||||
self.tenant.id,
|
||||
self.document_version.doc_id,
|
||||
self.document_version.language,
|
||||
self.document_version.id,
|
||||
markdown_filename,
|
||||
markdown.encode('utf-8')
|
||||
)
|
||||
|
||||
def _save_intermediate(self, content, filename):
|
||||
minio_client.upload_document_file(
|
||||
self.tenant.id,
|
||||
self.document_version.doc_id,
|
||||
self.document_version.language,
|
||||
self.document_version.id,
|
||||
filename,
|
||||
content.encode('utf-8')
|
||||
)
|
||||
42
eveai_workers/Processors/processor.py
Normal file
42
eveai_workers/Processors/processor.py
Normal file
@@ -0,0 +1,42 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from flask import current_app
|
||||
from common.extensions import minio_client
|
||||
|
||||
|
||||
class Processor(ABC):
|
||||
def __init__(self, tenant, model_variables, document_version):
|
||||
self.tenant = tenant
|
||||
self.model_variables = model_variables
|
||||
self.document_version = document_version
|
||||
self.embed_tuning = model_variables['embed_tuning']
|
||||
|
||||
@abstractmethod
|
||||
def process(self):
|
||||
pass
|
||||
|
||||
def _save_markdown(self, markdown):
|
||||
markdown_filename = f"{self.document_version.id}.md"
|
||||
minio_client.upload_document_file(
|
||||
self.tenant.id,
|
||||
self.document_version.doc_id,
|
||||
self.document_version.language,
|
||||
self.document_version.id,
|
||||
markdown_filename,
|
||||
markdown.encode('utf-8')
|
||||
)
|
||||
|
||||
def _log(self, message, level='debug'):
|
||||
logger = current_app.logger
|
||||
log_method = getattr(logger, level)
|
||||
log_method(
|
||||
f"{self.__class__.__name__} - Tenant {self.tenant.id}, Document {self.document_version.id}: {message}")
|
||||
|
||||
def _save_intermediate(self, content, filename):
|
||||
minio_client.upload_document_file(
|
||||
self.tenant.id,
|
||||
self.document_version.doc_id,
|
||||
self.document_version.language,
|
||||
self.document_version.id,
|
||||
filename,
|
||||
content.encode('utf-8')
|
||||
)
|
||||
80
eveai_workers/Processors/srt_processor.py
Normal file
80
eveai_workers/Processors/srt_processor.py
Normal file
@@ -0,0 +1,80 @@
|
||||
import re
|
||||
from langchain_core.output_parsers import StrOutputParser
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
from langchain_core.runnables import RunnablePassthrough
|
||||
from common.extensions import minio_client
|
||||
from common.utils.model_utils import create_language_template
|
||||
from .processor import Processor
|
||||
|
||||
|
||||
class SRTProcessor(Processor):
|
||||
def __init__(self, tenant, model_variables, document_version):
|
||||
super().__init__(tenant, model_variables, document_version)
|
||||
|
||||
def process(self):
|
||||
self._log("Starting SRT processing")
|
||||
try:
|
||||
file_data = minio_client.download_document_file(
|
||||
self.tenant.id,
|
||||
self.document_version.doc_id,
|
||||
self.document_version.language,
|
||||
self.document_version.id,
|
||||
self.document_version.file_name
|
||||
)
|
||||
|
||||
srt_content = file_data.decode('utf-8')
|
||||
cleaned_transcription = self._clean_srt(srt_content)
|
||||
markdown, title = self._generate_markdown_from_transcription(cleaned_transcription)
|
||||
|
||||
self._save_markdown(markdown)
|
||||
self._log("Finished processing SRT")
|
||||
return markdown, title
|
||||
except Exception as e:
|
||||
self._log(f"Error processing SRT: {str(e)}", level='error')
|
||||
raise
|
||||
|
||||
def _clean_srt(self, srt_content):
|
||||
# Remove timecodes and subtitle numbers
|
||||
cleaned_lines = []
|
||||
for line in srt_content.split('\n'):
|
||||
# Skip empty lines, subtitle numbers, and timecodes
|
||||
if line.strip() and not line.strip().isdigit() and not re.match(
|
||||
r'\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}', line):
|
||||
cleaned_lines.append(line.strip())
|
||||
|
||||
# Join the cleaned lines
|
||||
cleaned_text = ' '.join(cleaned_lines)
|
||||
|
||||
# Remove any extra spaces
|
||||
cleaned_text = re.sub(r'\s+', ' ', cleaned_text).strip()
|
||||
|
||||
return cleaned_text
|
||||
|
||||
def _generate_markdown_from_transcription(self, transcription):
|
||||
self._log("Generating markdown from transcription")
|
||||
llm = self.model_variables['llm']
|
||||
template = self.model_variables['transcript_template']
|
||||
language_template = create_language_template(template, self.document_version.language)
|
||||
transcript_prompt = ChatPromptTemplate.from_template(language_template)
|
||||
setup = RunnablePassthrough()
|
||||
output_parser = StrOutputParser()
|
||||
|
||||
chain = setup | transcript_prompt | llm | output_parser
|
||||
|
||||
input_transcript = {'transcript': transcription}
|
||||
markdown = chain.invoke(input_transcript)
|
||||
|
||||
# Extract title from the markdown
|
||||
title = self._extract_title_from_markdown(markdown)
|
||||
|
||||
return markdown, title
|
||||
|
||||
def _extract_title_from_markdown(self, markdown):
|
||||
# Simple extraction of the first header as the title
|
||||
lines = markdown.split('\n')
|
||||
for line in lines:
|
||||
if line.startswith('# '):
|
||||
return line[2:].strip()
|
||||
return "Untitled SRT Transcription"
|
||||
|
||||
|
||||
@@ -1,26 +1,16 @@
|
||||
import io
|
||||
import os
|
||||
from datetime import datetime as dt, timezone as tz
|
||||
import subprocess
|
||||
|
||||
import gevent
|
||||
from bs4 import BeautifulSoup
|
||||
import html
|
||||
from celery import states
|
||||
from flask import current_app
|
||||
# OpenAI imports
|
||||
from langchain.chains.summarize import load_summarize_chain
|
||||
from langchain.text_splitter import CharacterTextSplitter, MarkdownHeaderTextSplitter
|
||||
from langchain_core.documents import Document
|
||||
from langchain.text_splitter import MarkdownHeaderTextSplitter
|
||||
from langchain_core.exceptions import LangChainException
|
||||
from langchain_core.output_parsers import StrOutputParser
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
from langchain_core.runnables import RunnablePassthrough
|
||||
from sqlalchemy.exc import SQLAlchemyError
|
||||
from pytube import YouTube
|
||||
import PyPDF2
|
||||
from pydub import AudioSegment
|
||||
import tempfile
|
||||
|
||||
from common.extensions import db, minio_client
|
||||
from common.models.document import DocumentVersion, Embedding
|
||||
@@ -29,7 +19,10 @@ from common.utils.celery_utils import current_celery
|
||||
from common.utils.database import Database
|
||||
from common.utils.model_utils import select_model_variables, create_language_template
|
||||
from common.utils.os_utils import safe_remove, sync_folder
|
||||
from eveai_workers.Processors.PDF_Processor import PDFProcessor
|
||||
from eveai_workers.Processors.audio_processor import AudioProcessor
|
||||
from eveai_workers.Processors.html_processor import HTMLProcessor
|
||||
from eveai_workers.Processors.pdf_processor import PDFProcessor
|
||||
from eveai_workers.Processors.srt_processor import SRTProcessor
|
||||
|
||||
|
||||
@current_celery.task(name='create_embeddings', queue='embeddings')
|
||||
@@ -85,8 +78,10 @@ def create_embeddings(tenant_id, document_version_id):
|
||||
process_pdf(tenant, model_variables, document_version)
|
||||
case 'html':
|
||||
process_html(tenant, model_variables, document_version)
|
||||
case 'youtube':
|
||||
process_youtube(tenant, model_variables, document_version)
|
||||
case 'srt':
|
||||
process_srt(tenant, model_variables, document_version)
|
||||
case 'mp4' | 'mp3' | 'ogg':
|
||||
process_audio(tenant, model_variables, document_version)
|
||||
case _:
|
||||
raise Exception(f'No functionality defined for file type {document_version.file_type} '
|
||||
f'for tenant {tenant_id} '
|
||||
@@ -104,83 +99,6 @@ def create_embeddings(tenant_id, document_version_id):
|
||||
raise
|
||||
|
||||
|
||||
# def process_pdf(tenant, model_variables, document_version):
|
||||
# file_data = minio_client.download_document_file(tenant.id, document_version.doc_id, document_version.language,
|
||||
# document_version.id, document_version.file_name)
|
||||
#
|
||||
# pdf_text = ''
|
||||
# pdf_reader = PyPDF2.PdfReader(io.BytesIO(file_data))
|
||||
# for page in pdf_reader.pages:
|
||||
# pdf_text += page.extract_text()
|
||||
#
|
||||
# markdown = generate_markdown_from_pdf(tenant, model_variables, document_version, pdf_text)
|
||||
# markdown_file_name = f'{document_version.id}.md'
|
||||
# minio_client.upload_document_file(tenant.id, document_version.doc_id, document_version.language,
|
||||
# document_version.id,
|
||||
# markdown_file_name, markdown.encode())
|
||||
#
|
||||
# potential_chunks = create_potential_chunks_for_markdown(tenant.id, document_version, markdown_file_name)
|
||||
# chunks = combine_chunks_for_markdown(potential_chunks, model_variables['min_chunk_size'],
|
||||
# model_variables['max_chunk_size'])
|
||||
#
|
||||
# if len(chunks) > 1:
|
||||
# summary = summarize_chunk(tenant, model_variables, document_version, chunks[0])
|
||||
# document_version.system_context = f'Summary: {summary}\n'
|
||||
# else:
|
||||
# document_version.system_context = ''
|
||||
#
|
||||
# enriched_chunks = enrich_chunks(tenant, document_version, chunks)
|
||||
# embeddings = embed_chunks(tenant, model_variables, document_version, enriched_chunks)
|
||||
#
|
||||
# try:
|
||||
# db.session.add(document_version)
|
||||
# document_version.processing_finished_at = dt.now(tz.utc)
|
||||
# document_version.processing = False
|
||||
# db.session.add_all(embeddings)
|
||||
# db.session.commit()
|
||||
# except SQLAlchemyError as e:
|
||||
# current_app.logger.error(f'Error saving embedding information for tenant {tenant.id} '
|
||||
# f'on HTML, document version {document_version.id}'
|
||||
# f'error: {e}')
|
||||
# raise
|
||||
#
|
||||
# current_app.logger.info(f'Embeddings created successfully for tenant {tenant.id} '
|
||||
# f'on document version {document_version.id} :-)')
|
||||
|
||||
def process_pdf(tenant, model_variables, document_version):
|
||||
processor = PDFProcessor(tenant, model_variables, document_version)
|
||||
markdown, title = processor.process_pdf()
|
||||
|
||||
# Create potential chunks for embedding
|
||||
potential_chunks = create_potential_chunks_for_markdown(tenant.id, document_version, f"{document_version.id}.md")
|
||||
|
||||
# Combine chunks for embedding
|
||||
chunks = combine_chunks_for_markdown(potential_chunks, model_variables['min_chunk_size'],
|
||||
model_variables['max_chunk_size'])
|
||||
|
||||
# Enrich chunks
|
||||
enriched_chunks = enrich_chunks(tenant, document_version, title, chunks)
|
||||
|
||||
# Create embeddings
|
||||
embeddings = embed_chunks(tenant, model_variables, document_version, enriched_chunks)
|
||||
|
||||
# Update document version and save embeddings
|
||||
try:
|
||||
db.session.add(document_version)
|
||||
document_version.processing_finished_at = dt.now(tz.utc)
|
||||
document_version.processing = False
|
||||
db.session.add_all(embeddings)
|
||||
db.session.commit()
|
||||
except SQLAlchemyError as e:
|
||||
current_app.logger.error(f'Error saving embedding information for tenant {tenant.id} '
|
||||
f'on PDF, document version {document_version.id}'
|
||||
f'error: {e}')
|
||||
raise
|
||||
|
||||
current_app.logger.info(f'Embeddings created successfully for tenant {tenant.id} '
|
||||
f'on document version {document_version.id} :-)')
|
||||
|
||||
|
||||
def delete_embeddings_for_document_version(document_version):
|
||||
embeddings_to_delete = db.session.query(Embedding).filter_by(doc_vers_id=document_version.id).all()
|
||||
for embedding in embeddings_to_delete:
|
||||
@@ -193,45 +111,53 @@ def delete_embeddings_for_document_version(document_version):
|
||||
raise
|
||||
|
||||
|
||||
def process_pdf(tenant, model_variables, document_version):
|
||||
processor = PDFProcessor(tenant, model_variables, document_version)
|
||||
markdown, title = processor.process()
|
||||
|
||||
# Process markdown and embed
|
||||
embed_markdown(tenant, model_variables, document_version, markdown, title)
|
||||
|
||||
|
||||
def process_html(tenant, model_variables, document_version):
|
||||
file_data = minio_client.download_document_file(tenant.id, document_version.doc_id, document_version.language,
|
||||
document_version.id, document_version.file_name)
|
||||
html_content = file_data.decode('utf-8')
|
||||
processor = HTMLProcessor(tenant, model_variables, document_version)
|
||||
markdown, title = processor.process()
|
||||
|
||||
# The tags to be considered can be dependent on the tenant
|
||||
html_tags = model_variables['html_tags']
|
||||
html_end_tags = model_variables['html_end_tags']
|
||||
html_included_elements = model_variables['html_included_elements']
|
||||
html_excluded_elements = model_variables['html_excluded_elements']
|
||||
# Process markdown and embed
|
||||
embed_markdown(tenant, model_variables, document_version, markdown, title)
|
||||
|
||||
extracted_html, title = parse_html(tenant, html_content, html_tags, included_elements=html_included_elements,
|
||||
excluded_elements=html_excluded_elements)
|
||||
|
||||
extracted_file_name = f'{document_version.id}-extracted.html'
|
||||
minio_client.upload_document_file(tenant.id, document_version.doc_id, document_version.language,
|
||||
document_version.id,
|
||||
extracted_file_name, extracted_html.encode())
|
||||
def process_audio(tenant, model_variables, document_version):
|
||||
processor = AudioProcessor(tenant, model_variables, document_version)
|
||||
markdown, title = processor.process()
|
||||
|
||||
markdown = generate_markdown_from_html(tenant, model_variables, document_version, extracted_html)
|
||||
markdown_file_name = f'{document_version.id}.md'
|
||||
minio_client.upload_document_file(tenant.id, document_version.doc_id, document_version.language,
|
||||
document_version.id,
|
||||
markdown_file_name, markdown.encode())
|
||||
# Process markdown and embed
|
||||
embed_markdown(tenant, model_variables, document_version, markdown, title)
|
||||
|
||||
potential_chunks = create_potential_chunks_for_markdown(tenant.id, document_version, markdown_file_name)
|
||||
|
||||
def process_srt(tenant, model_variables, document_version):
|
||||
processor = SRTProcessor(tenant, model_variables, document_version)
|
||||
markdown, title = processor.process()
|
||||
|
||||
# Process markdown and embed
|
||||
embed_markdown(tenant, model_variables, document_version, markdown, title)
|
||||
|
||||
|
||||
def embed_markdown(tenant, model_variables, document_version, markdown, title):
|
||||
# Create potential chunks
|
||||
potential_chunks = create_potential_chunks_for_markdown(tenant.id, document_version, f"{document_version.id}.md")
|
||||
|
||||
# Combine chunks for embedding
|
||||
chunks = combine_chunks_for_markdown(potential_chunks, model_variables['min_chunk_size'],
|
||||
model_variables['max_chunk_size'])
|
||||
|
||||
if len(chunks) > 1:
|
||||
summary = summarize_chunk(tenant, model_variables, document_version, chunks[0])
|
||||
document_version.system_context = (f'Title: {title}\n'
|
||||
f'Summary: {summary}\n')
|
||||
else:
|
||||
document_version.system_context = (f'Title: {title}\n')
|
||||
# Enrich chunks
|
||||
enriched_chunks = enrich_chunks(tenant, model_variables, document_version, title, chunks)
|
||||
|
||||
enriched_chunks = enrich_chunks(tenant, document_version, title, chunks)
|
||||
# Create embeddings
|
||||
embeddings = embed_chunks(tenant, model_variables, document_version, enriched_chunks)
|
||||
|
||||
# Update document version and save embeddings
|
||||
try:
|
||||
db.session.add(document_version)
|
||||
document_version.processing_finished_at = dt.now(tz.utc)
|
||||
@@ -248,12 +174,18 @@ def process_html(tenant, model_variables, document_version):
|
||||
f'on document version {document_version.id} :-)')
|
||||
|
||||
|
||||
def enrich_chunks(tenant, document_version, title, chunks):
|
||||
def enrich_chunks(tenant, model_variables, document_version, title, chunks):
|
||||
current_app.logger.debug(f'Enriching chunks for tenant {tenant.id} '
|
||||
f'on document version {document_version.id}')
|
||||
current_app.logger.debug(f'Nr of chunks: {len(chunks)}')
|
||||
|
||||
summary = ''
|
||||
if len(chunks) > 1:
|
||||
summary = summarize_chunk(tenant, model_variables, document_version, chunks[0])
|
||||
|
||||
chunk_total_context = (f'Filename: {document_version.file_name}\n'
|
||||
f'User Context:\n{document_version.user_context}\n\n'
|
||||
f'Title: {title}\n'
|
||||
f'{summary}\n'
|
||||
f'{document_version.system_context}\n\n')
|
||||
enriched_chunks = []
|
||||
initial_chunk = (f'Filename: {document_version.file_name}\n'
|
||||
@@ -272,95 +204,6 @@ def enrich_chunks(tenant, document_version, title, chunks):
|
||||
return enriched_chunks
|
||||
|
||||
|
||||
def generate_markdown_from_html(tenant, model_variables, document_version, html_content):
|
||||
current_app.logger.debug(f'Generating markdown from HTML for tenant {tenant.id} '
|
||||
f'on document version {document_version.id}')
|
||||
|
||||
llm = model_variables['llm']
|
||||
template = model_variables['html_parse_template']
|
||||
parse_prompt = ChatPromptTemplate.from_template(template)
|
||||
setup = RunnablePassthrough()
|
||||
output_parser = StrOutputParser()
|
||||
chain = setup | parse_prompt | llm | output_parser
|
||||
|
||||
soup = BeautifulSoup(html_content, 'lxml')
|
||||
|
||||
def split_content(soup, max_size=20000):
|
||||
chunks = []
|
||||
current_chunk = []
|
||||
current_size = 0
|
||||
|
||||
for element in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'div', 'span', 'table']):
|
||||
element_html = str(element)
|
||||
element_size = len(element_html)
|
||||
|
||||
if current_size + element_size > max_size and current_chunk:
|
||||
chunks.append(''.join(map(str, current_chunk)))
|
||||
current_chunk = []
|
||||
current_size = 0
|
||||
|
||||
current_chunk.append(element)
|
||||
current_size += element_size
|
||||
|
||||
if element.name in ['h1', 'h2', 'h3'] and current_size > max_size:
|
||||
chunks.append(''.join(map(str, current_chunk)))
|
||||
current_chunk = []
|
||||
current_size = 0
|
||||
|
||||
if current_chunk:
|
||||
chunks.append(''.join(map(str, current_chunk)))
|
||||
|
||||
return chunks
|
||||
|
||||
chunks = split_content(soup)
|
||||
|
||||
markdown_chunks = []
|
||||
|
||||
for chunk in chunks:
|
||||
current_app.logger.debug(f'Processing chunk to generate markdown from HTML for tenant {tenant.id} '
|
||||
f'on document version {document_version.id}')
|
||||
if tenant.embed_tuning:
|
||||
current_app.embed_tuning_logger.debug(f'Processing chunk: \n '
|
||||
f'------------------\n'
|
||||
f'{chunk}\n'
|
||||
f'------------------\n')
|
||||
input_html = {"html": chunk}
|
||||
markdown_chunk = chain.invoke(input_html)
|
||||
markdown_chunks.append(markdown_chunk)
|
||||
if tenant.embed_tuning:
|
||||
current_app.embed_tuning_logger.debug(f'Processed markdown chunk: \n '
|
||||
f'-------------------------\n'
|
||||
f'{markdown_chunk}\n'
|
||||
f'-------------------------\n')
|
||||
current_app.logger.debug(f'Finished processing chunk to generate markdown from HTML for tenant {tenant.id} '
|
||||
f'on document version {document_version.id}')
|
||||
|
||||
# Combine all markdown chunks
|
||||
markdown = "\n\n".join(markdown_chunks)
|
||||
|
||||
current_app.logger.debug(f'Finished generating markdown from HTML for tenant {tenant.id} '
|
||||
f'on document version {document_version.id}')
|
||||
|
||||
return markdown
|
||||
|
||||
|
||||
def generate_markdown_from_pdf(tenant, model_variables, document_version, pdf_content):
|
||||
current_app.logger.debug(f'Generating Markdown from PDF for tenant {tenant.id} '
|
||||
f'on document version {document_version.id}')
|
||||
llm = model_variables['llm']
|
||||
template = model_variables['pdf_parse_template']
|
||||
parse_prompt = ChatPromptTemplate.from_template(template)
|
||||
setup = RunnablePassthrough()
|
||||
output_parser = StrOutputParser()
|
||||
|
||||
chain = setup | parse_prompt | llm | output_parser
|
||||
input_pdf = {"pdf_content": pdf_content}
|
||||
|
||||
markdown = chain.invoke(input_pdf)
|
||||
|
||||
return markdown
|
||||
|
||||
|
||||
def summarize_chunk(tenant, model_variables, document_version, chunk):
|
||||
current_app.logger.debug(f'Summarizing chunk for tenant {tenant.id} '
|
||||
f'on document version {document_version.id}')
|
||||
@@ -415,65 +258,6 @@ def embed_chunks(tenant, model_variables, document_version, chunks):
|
||||
return new_embeddings
|
||||
|
||||
|
||||
def parse_html(tenant, html_content, tags, included_elements=None, excluded_elements=None):
|
||||
current_app.logger.debug(f'Parsing HTML for tenant {tenant.id}')
|
||||
soup = BeautifulSoup(html_content, 'html.parser')
|
||||
extracted_html = ''
|
||||
excluded_classes = parse_excluded_classes(tenant.html_excluded_classes)
|
||||
|
||||
if included_elements:
|
||||
elements_to_parse = soup.find_all(included_elements)
|
||||
else:
|
||||
elements_to_parse = [soup]
|
||||
|
||||
log_parsing_info(tenant, tags, included_elements, excluded_elements, excluded_classes, elements_to_parse)
|
||||
|
||||
for element in elements_to_parse:
|
||||
for sub_element in element.find_all(tags):
|
||||
if should_exclude_element(sub_element, excluded_elements, excluded_classes):
|
||||
continue
|
||||
extracted_html += extract_element_content(sub_element)
|
||||
|
||||
title = soup.find('title').get_text(strip=True) if soup.find('title') else ''
|
||||
|
||||
current_app.logger.debug(f'Finished parsing HTML for tenant {tenant.id}')
|
||||
|
||||
return extracted_html, title
|
||||
|
||||
|
||||
def parse_excluded_classes(excluded_classes):
|
||||
parsed = {}
|
||||
for rule in excluded_classes:
|
||||
element, cls = rule.split('.', 1)
|
||||
parsed.setdefault(element, set()).add(cls)
|
||||
return parsed
|
||||
|
||||
|
||||
def should_exclude_element(element, excluded_elements, excluded_classes):
|
||||
if excluded_elements and element.find_parent(excluded_elements):
|
||||
return True
|
||||
return is_element_excluded_by_class(element, excluded_classes)
|
||||
|
||||
|
||||
def is_element_excluded_by_class(element, excluded_classes):
|
||||
for parent in element.parents:
|
||||
if element_matches_exclusion(parent, excluded_classes):
|
||||
return True
|
||||
return element_matches_exclusion(element, excluded_classes)
|
||||
|
||||
|
||||
def element_matches_exclusion(element, excluded_classes):
|
||||
if '*' in excluded_classes and any(cls in excluded_classes['*'] for cls in element.get('class', [])):
|
||||
return True
|
||||
return element.name in excluded_classes and \
|
||||
any(cls in excluded_classes[element.name] for cls in element.get('class', []))
|
||||
|
||||
|
||||
def extract_element_content(element):
|
||||
content = ' '.join(child.strip() for child in element.stripped_strings)
|
||||
return f'<{element.name}>{content}</{element.name}>\n'
|
||||
|
||||
|
||||
def log_parsing_info(tenant, tags, included_elements, excluded_elements, excluded_classes, elements_to_parse):
|
||||
if tenant.embed_tuning:
|
||||
current_app.embed_tuning_logger.debug(f'Tags to parse: {tags}')
|
||||
@@ -484,244 +268,242 @@ def log_parsing_info(tenant, tags, included_elements, excluded_elements, exclude
|
||||
current_app.embed_tuning_logger.debug(f'First element to parse: {elements_to_parse[0]}')
|
||||
|
||||
|
||||
def process_youtube(tenant, model_variables, document_version):
|
||||
base_path = os.path.join(current_app.config['UPLOAD_FOLDER'],
|
||||
document_version.file_location)
|
||||
download_file_name = f'{document_version.id}.mp4'
|
||||
compressed_file_name = f'{document_version.id}.mp3'
|
||||
transcription_file_name = f'{document_version.id}.txt'
|
||||
markdown_file_name = f'{document_version.id}.md'
|
||||
|
||||
# Remove existing files (in case of a re-processing of the file
|
||||
minio_client.delete_document_file(tenant.id, document_version.doc_id, document_version.language,
|
||||
document_version.id, download_file_name)
|
||||
minio_client.delete_document_file(tenant.id, document_version.doc_id, document_version.language,
|
||||
document_version.id, compressed_file_name)
|
||||
minio_client.delete_document_file(tenant.id, document_version.doc_id, document_version.language,
|
||||
document_version.id, transcription_file_name)
|
||||
minio_client.delete_document_file(tenant.id, document_version.doc_id, document_version.language,
|
||||
document_version.id, markdown_file_name)
|
||||
|
||||
of, title, description, author = download_youtube(document_version.url, tenant.id, document_version,
|
||||
download_file_name)
|
||||
document_version.system_context = f'Title: {title}\nDescription: {description}\nAuthor: {author}'
|
||||
compress_audio(tenant.id, document_version, download_file_name, compressed_file_name)
|
||||
transcribe_audio(tenant.id, document_version, compressed_file_name, transcription_file_name, model_variables)
|
||||
annotate_transcription(tenant, document_version, transcription_file_name, markdown_file_name, model_variables)
|
||||
|
||||
potential_chunks = create_potential_chunks_for_markdown(tenant.id, document_version, markdown_file_name)
|
||||
actual_chunks = combine_chunks_for_markdown(potential_chunks, model_variables['min_chunk_size'],
|
||||
model_variables['max_chunk_size'])
|
||||
|
||||
enriched_chunks = enrich_chunks(tenant, document_version, actual_chunks)
|
||||
embeddings = embed_chunks(tenant, model_variables, document_version, enriched_chunks)
|
||||
|
||||
try:
|
||||
db.session.add(document_version)
|
||||
document_version.processing_finished_at = dt.now(tz.utc)
|
||||
document_version.processing = False
|
||||
db.session.add_all(embeddings)
|
||||
db.session.commit()
|
||||
except SQLAlchemyError as e:
|
||||
current_app.logger.error(f'Error saving embedding information for tenant {tenant.id} '
|
||||
f'on Youtube document version {document_version.id}'
|
||||
f'error: {e}')
|
||||
raise
|
||||
|
||||
current_app.logger.info(f'Embeddings created successfully for tenant {tenant.id} '
|
||||
f'on Youtube document version {document_version.id} :-)')
|
||||
|
||||
|
||||
def download_youtube(url, tenant_id, document_version, file_name):
|
||||
try:
|
||||
current_app.logger.info(f'Downloading YouTube video: {url} for tenant: {tenant_id}')
|
||||
yt = YouTube(url)
|
||||
stream = yt.streams.get_audio_only()
|
||||
|
||||
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
|
||||
stream.download(output_path=temp_file.name)
|
||||
with open(temp_file.name, 'rb') as f:
|
||||
file_data = f.read()
|
||||
|
||||
minio_client.upload_document_file(tenant_id, document_version.doc_id, document_version.language,
|
||||
document_version.id,
|
||||
file_name, file_data)
|
||||
|
||||
current_app.logger.info(f'Downloaded YouTube video: {url} for tenant: {tenant_id}')
|
||||
return file_name, yt.title, yt.description, yt.author
|
||||
except Exception as e:
|
||||
current_app.logger.error(f'Error downloading YouTube video: {url} for tenant: {tenant_id} with error: {e}')
|
||||
raise
|
||||
|
||||
|
||||
def compress_audio(tenant_id, document_version, input_file, output_file):
|
||||
try:
|
||||
current_app.logger.info(f'Compressing audio for tenant: {tenant_id}')
|
||||
|
||||
input_data = minio_client.download_document_file(tenant_id, document_version.doc_id, document_version.language,
|
||||
document_version.id, input_file)
|
||||
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_input:
|
||||
temp_input.write(input_data)
|
||||
temp_input.flush()
|
||||
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_output:
|
||||
result = subprocess.run(
|
||||
['ffmpeg', '-i', temp_input.name, '-b:a', '64k', '-f', 'mp3', temp_output.name],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
raise Exception(f"Compression failed: {result.stderr}")
|
||||
|
||||
with open(temp_output.name, 'rb') as f:
|
||||
compressed_data = f.read()
|
||||
|
||||
minio_client.upload_document_file(tenant_id, document_version.doc_id, document_version.language,
|
||||
document_version.id,
|
||||
output_file, compressed_data)
|
||||
|
||||
current_app.logger.info(f'Compressed audio for tenant: {tenant_id}')
|
||||
except Exception as e:
|
||||
current_app.logger.error(f'Error compressing audio for tenant: {tenant_id} with error: {e}')
|
||||
raise
|
||||
|
||||
|
||||
def transcribe_audio(tenant_id, document_version, input_file, output_file, model_variables):
|
||||
try:
|
||||
current_app.logger.info(f'Transcribing audio for tenant: {tenant_id}')
|
||||
client = model_variables['transcription_client']
|
||||
model = model_variables['transcription_model']
|
||||
|
||||
# Download the audio file from MinIO
|
||||
audio_data = minio_client.download_document_file(tenant_id, document_version.doc_id, document_version.language,
|
||||
document_version.id, input_file)
|
||||
|
||||
# Load the audio data into pydub
|
||||
audio = AudioSegment.from_mp3(io.BytesIO(audio_data))
|
||||
|
||||
# Define segment length (e.g., 10 minutes)
|
||||
segment_length = 10 * 60 * 1000 # 10 minutes in milliseconds
|
||||
|
||||
transcriptions = []
|
||||
|
||||
# Split audio into segments and transcribe each
|
||||
for i, chunk in enumerate(audio[::segment_length]):
|
||||
current_app.logger.debug(f'Transcribing chunk {i + 1} of {len(audio) // segment_length + 1}')
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio:
|
||||
chunk.export(temp_audio.name, format="mp3")
|
||||
|
||||
with open(temp_audio.name, 'rb') as audio_segment:
|
||||
transcription = client.audio.transcriptions.create(
|
||||
file=audio_segment,
|
||||
model=model,
|
||||
language=document_version.language,
|
||||
response_format='verbose_json',
|
||||
)
|
||||
|
||||
transcriptions.append(transcription.text)
|
||||
|
||||
os.unlink(temp_audio.name) # Delete the temporary file
|
||||
|
||||
# Combine all transcriptions
|
||||
full_transcription = " ".join(transcriptions)
|
||||
|
||||
# Upload the full transcription to MinIO
|
||||
minio_client.upload_document_file(
|
||||
tenant_id,
|
||||
document_version.doc_id,
|
||||
document_version.language,
|
||||
document_version.id,
|
||||
output_file,
|
||||
full_transcription.encode('utf-8')
|
||||
)
|
||||
|
||||
current_app.logger.info(f'Transcribed audio for tenant: {tenant_id}')
|
||||
except Exception as e:
|
||||
current_app.logger.error(f'Error transcribing audio for tenant: {tenant_id}, with error: {e}')
|
||||
raise
|
||||
|
||||
|
||||
def annotate_transcription(tenant, document_version, input_file, output_file, model_variables):
|
||||
try:
|
||||
current_app.logger.debug(f'Annotating transcription for tenant {tenant.id}')
|
||||
|
||||
char_splitter = CharacterTextSplitter(separator='.',
|
||||
chunk_size=model_variables['annotation_chunk_length'],
|
||||
chunk_overlap=0)
|
||||
|
||||
headers_to_split_on = [
|
||||
("#", "Header 1"),
|
||||
("##", "Header 2"),
|
||||
]
|
||||
markdown_splitter = MarkdownHeaderTextSplitter(headers_to_split_on, strip_headers=False)
|
||||
|
||||
llm = model_variables['llm']
|
||||
template = model_variables['transcript_template']
|
||||
language_template = create_language_template(template, document_version.language)
|
||||
transcript_prompt = ChatPromptTemplate.from_template(language_template)
|
||||
setup = RunnablePassthrough()
|
||||
output_parser = StrOutputParser()
|
||||
|
||||
# Download the transcription file from MinIO
|
||||
transcript_data = minio_client.download_document_file(tenant.id, document_version.doc_id,
|
||||
document_version.language, document_version.id,
|
||||
input_file)
|
||||
transcript = transcript_data.decode('utf-8')
|
||||
|
||||
chain = setup | transcript_prompt | llm | output_parser
|
||||
|
||||
chunks = char_splitter.split_text(transcript)
|
||||
all_markdown_chunks = []
|
||||
last_markdown_chunk = ''
|
||||
for chunk in chunks:
|
||||
current_app.logger.debug(f'Annotating next chunk of {len(chunks)} for tenant {tenant.id}')
|
||||
full_input = last_markdown_chunk + '\n' + chunk
|
||||
if tenant.embed_tuning:
|
||||
current_app.embed_tuning_logger.debug(f'Annotating chunk: \n '
|
||||
f'------------------\n'
|
||||
f'{full_input}\n'
|
||||
f'------------------\n')
|
||||
input_transcript = {'transcript': full_input}
|
||||
markdown = chain.invoke(input_transcript)
|
||||
# GPT-4o returns some kind of content description: ```markdown <text> ```
|
||||
if markdown.startswith("```markdown"):
|
||||
markdown = "\n".join(markdown.strip().split("\n")[1:-1])
|
||||
if tenant.embed_tuning:
|
||||
current_app.embed_tuning_logger.debug(f'Markdown Received: \n '
|
||||
f'------------------\n'
|
||||
f'{markdown}\n'
|
||||
f'------------------\n')
|
||||
md_header_splits = markdown_splitter.split_text(markdown)
|
||||
markdown_chunks = [doc.page_content for doc in md_header_splits]
|
||||
# claude-3.5-sonnet returns introductory text
|
||||
if not markdown_chunks[0].startswith('#'):
|
||||
markdown_chunks.pop(0)
|
||||
last_markdown_chunk = markdown_chunks[-1]
|
||||
last_markdown_chunk = "\n".join(markdown.strip().split("\n")[1:])
|
||||
markdown_chunks.pop()
|
||||
all_markdown_chunks += markdown_chunks
|
||||
|
||||
all_markdown_chunks += [last_markdown_chunk]
|
||||
|
||||
annotated_transcript = '\n'.join(all_markdown_chunks)
|
||||
|
||||
# Upload the annotated transcript to MinIO
|
||||
minio_client.upload_document_file(
|
||||
tenant.id,
|
||||
document_version.doc_id,
|
||||
document_version.language,
|
||||
document_version.id,
|
||||
output_file,
|
||||
annotated_transcript.encode('utf-8')
|
||||
)
|
||||
|
||||
current_app.logger.info(f'Annotated transcription for tenant {tenant.id}')
|
||||
except Exception as e:
|
||||
current_app.logger.error(f'Error annotating transcription for tenant {tenant.id}, with error: {e}')
|
||||
raise
|
||||
# def process_youtube(tenant, model_variables, document_version):
|
||||
# download_file_name = f'{document_version.id}.mp4'
|
||||
# compressed_file_name = f'{document_version.id}.mp3'
|
||||
# transcription_file_name = f'{document_version.id}.txt'
|
||||
# markdown_file_name = f'{document_version.id}.md'
|
||||
#
|
||||
# # Remove existing files (in case of a re-processing of the file
|
||||
# minio_client.delete_document_file(tenant.id, document_version.doc_id, document_version.language,
|
||||
# document_version.id, download_file_name)
|
||||
# minio_client.delete_document_file(tenant.id, document_version.doc_id, document_version.language,
|
||||
# document_version.id, compressed_file_name)
|
||||
# minio_client.delete_document_file(tenant.id, document_version.doc_id, document_version.language,
|
||||
# document_version.id, transcription_file_name)
|
||||
# minio_client.delete_document_file(tenant.id, document_version.doc_id, document_version.language,
|
||||
# document_version.id, markdown_file_name)
|
||||
#
|
||||
# of, title, description, author = download_youtube(document_version.url, tenant.id, document_version,
|
||||
# download_file_name)
|
||||
# document_version.system_context = f'Title: {title}\nDescription: {description}\nAuthor: {author}'
|
||||
# compress_audio(tenant.id, document_version, download_file_name, compressed_file_name)
|
||||
# transcribe_audio(tenant.id, document_version, compressed_file_name, transcription_file_name, model_variables)
|
||||
# annotate_transcription(tenant, document_version, transcription_file_name, markdown_file_name, model_variables)
|
||||
#
|
||||
# potential_chunks = create_potential_chunks_for_markdown(tenant.id, document_version, markdown_file_name)
|
||||
# actual_chunks = combine_chunks_for_markdown(potential_chunks, model_variables['min_chunk_size'],
|
||||
# model_variables['max_chunk_size'])
|
||||
#
|
||||
# enriched_chunks = enrich_chunks(tenant, document_version, actual_chunks)
|
||||
# embeddings = embed_chunks(tenant, model_variables, document_version, enriched_chunks)
|
||||
#
|
||||
# try:
|
||||
# db.session.add(document_version)
|
||||
# document_version.processing_finished_at = dt.now(tz.utc)
|
||||
# document_version.processing = False
|
||||
# db.session.add_all(embeddings)
|
||||
# db.session.commit()
|
||||
# except SQLAlchemyError as e:
|
||||
# current_app.logger.error(f'Error saving embedding information for tenant {tenant.id} '
|
||||
# f'on Youtube document version {document_version.id}'
|
||||
# f'error: {e}')
|
||||
# raise
|
||||
#
|
||||
# current_app.logger.info(f'Embeddings created successfully for tenant {tenant.id} '
|
||||
# f'on Youtube document version {document_version.id} :-)')
|
||||
#
|
||||
#
|
||||
# def download_youtube(url, tenant_id, document_version, file_name):
|
||||
# try:
|
||||
# current_app.logger.info(f'Downloading YouTube video: {url} for tenant: {tenant_id}')
|
||||
# yt = YouTube(url)
|
||||
# stream = yt.streams.get_audio_only()
|
||||
#
|
||||
# with tempfile.NamedTemporaryFile(delete=False) as temp_file:
|
||||
# stream.download(output_path=temp_file.name)
|
||||
# with open(temp_file.name, 'rb') as f:
|
||||
# file_data = f.read()
|
||||
#
|
||||
# minio_client.upload_document_file(tenant_id, document_version.doc_id, document_version.language,
|
||||
# document_version.id,
|
||||
# file_name, file_data)
|
||||
#
|
||||
# current_app.logger.info(f'Downloaded YouTube video: {url} for tenant: {tenant_id}')
|
||||
# return file_name, yt.title, yt.description, yt.author
|
||||
# except Exception as e:
|
||||
# current_app.logger.error(f'Error downloading YouTube video: {url} for tenant: {tenant_id} with error: {e}')
|
||||
# raise
|
||||
#
|
||||
#
|
||||
# def compress_audio(tenant_id, document_version, input_file, output_file):
|
||||
# try:
|
||||
# current_app.logger.info(f'Compressing audio for tenant: {tenant_id}')
|
||||
#
|
||||
# input_data = minio_client.download_document_file(tenant_id, document_version.doc_id, document_version.language,
|
||||
# document_version.id, input_file)
|
||||
#
|
||||
# with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_input:
|
||||
# temp_input.write(input_data)
|
||||
# temp_input.flush()
|
||||
#
|
||||
# with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_output:
|
||||
# result = subprocess.run(
|
||||
# ['ffmpeg', '-i', temp_input.name, '-b:a', '64k', '-f', 'mp3', temp_output.name],
|
||||
# capture_output=True,
|
||||
# text=True
|
||||
# )
|
||||
#
|
||||
# if result.returncode != 0:
|
||||
# raise Exception(f"Compression failed: {result.stderr}")
|
||||
#
|
||||
# with open(temp_output.name, 'rb') as f:
|
||||
# compressed_data = f.read()
|
||||
#
|
||||
# minio_client.upload_document_file(tenant_id, document_version.doc_id, document_version.language,
|
||||
# document_version.id,
|
||||
# output_file, compressed_data)
|
||||
#
|
||||
# current_app.logger.info(f'Compressed audio for tenant: {tenant_id}')
|
||||
# except Exception as e:
|
||||
# current_app.logger.error(f'Error compressing audio for tenant: {tenant_id} with error: {e}')
|
||||
# raise
|
||||
#
|
||||
#
|
||||
# def transcribe_audio(tenant_id, document_version, input_file, output_file, model_variables):
|
||||
# try:
|
||||
# current_app.logger.info(f'Transcribing audio for tenant: {tenant_id}')
|
||||
# client = model_variables['transcription_client']
|
||||
# model = model_variables['transcription_model']
|
||||
#
|
||||
# # Download the audio file from MinIO
|
||||
# audio_data = minio_client.download_document_file(tenant_id, document_version.doc_id, document_version.language,
|
||||
# document_version.id, input_file)
|
||||
#
|
||||
# # Load the audio data into pydub
|
||||
# audio = AudioSegment.from_mp3(io.BytesIO(audio_data))
|
||||
#
|
||||
# # Define segment length (e.g., 10 minutes)
|
||||
# segment_length = 10 * 60 * 1000 # 10 minutes in milliseconds
|
||||
#
|
||||
# transcriptions = []
|
||||
#
|
||||
# # Split audio into segments and transcribe each
|
||||
# for i, chunk in enumerate(audio[::segment_length]):
|
||||
# current_app.logger.debug(f'Transcribing chunk {i + 1} of {len(audio) // segment_length + 1}')
|
||||
#
|
||||
# with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio:
|
||||
# chunk.export(temp_audio.name, format="mp3")
|
||||
#
|
||||
# with open(temp_audio.name, 'rb') as audio_segment:
|
||||
# transcription = client.audio.transcriptions.create(
|
||||
# file=audio_segment,
|
||||
# model=model,
|
||||
# language=document_version.language,
|
||||
# response_format='verbose_json',
|
||||
# )
|
||||
#
|
||||
# transcriptions.append(transcription.text)
|
||||
#
|
||||
# os.unlink(temp_audio.name) # Delete the temporary file
|
||||
#
|
||||
# # Combine all transcriptions
|
||||
# full_transcription = " ".join(transcriptions)
|
||||
#
|
||||
# # Upload the full transcription to MinIO
|
||||
# minio_client.upload_document_file(
|
||||
# tenant_id,
|
||||
# document_version.doc_id,
|
||||
# document_version.language,
|
||||
# document_version.id,
|
||||
# output_file,
|
||||
# full_transcription.encode('utf-8')
|
||||
# )
|
||||
#
|
||||
# current_app.logger.info(f'Transcribed audio for tenant: {tenant_id}')
|
||||
# except Exception as e:
|
||||
# current_app.logger.error(f'Error transcribing audio for tenant: {tenant_id}, with error: {e}')
|
||||
# raise
|
||||
#
|
||||
#
|
||||
# def annotate_transcription(tenant, document_version, input_file, output_file, model_variables):
|
||||
# try:
|
||||
# current_app.logger.debug(f'Annotating transcription for tenant {tenant.id}')
|
||||
#
|
||||
# char_splitter = CharacterTextSplitter(separator='.',
|
||||
# chunk_size=model_variables['annotation_chunk_length'],
|
||||
# chunk_overlap=0)
|
||||
#
|
||||
# headers_to_split_on = [
|
||||
# ("#", "Header 1"),
|
||||
# ("##", "Header 2"),
|
||||
# ]
|
||||
# markdown_splitter = MarkdownHeaderTextSplitter(headers_to_split_on, strip_headers=False)
|
||||
#
|
||||
# llm = model_variables['llm']
|
||||
# template = model_variables['transcript_template']
|
||||
# language_template = create_language_template(template, document_version.language)
|
||||
# transcript_prompt = ChatPromptTemplate.from_template(language_template)
|
||||
# setup = RunnablePassthrough()
|
||||
# output_parser = StrOutputParser()
|
||||
#
|
||||
# # Download the transcription file from MinIO
|
||||
# transcript_data = minio_client.download_document_file(tenant.id, document_version.doc_id,
|
||||
# document_version.language, document_version.id,
|
||||
# input_file)
|
||||
# transcript = transcript_data.decode('utf-8')
|
||||
#
|
||||
# chain = setup | transcript_prompt | llm | output_parser
|
||||
#
|
||||
# chunks = char_splitter.split_text(transcript)
|
||||
# all_markdown_chunks = []
|
||||
# last_markdown_chunk = ''
|
||||
# for chunk in chunks:
|
||||
# current_app.logger.debug(f'Annotating next chunk of {len(chunks)} for tenant {tenant.id}')
|
||||
# full_input = last_markdown_chunk + '\n' + chunk
|
||||
# if tenant.embed_tuning:
|
||||
# current_app.embed_tuning_logger.debug(f'Annotating chunk: \n '
|
||||
# f'------------------\n'
|
||||
# f'{full_input}\n'
|
||||
# f'------------------\n')
|
||||
# input_transcript = {'transcript': full_input}
|
||||
# markdown = chain.invoke(input_transcript)
|
||||
# # GPT-4o returns some kind of content description: ```markdown <text> ```
|
||||
# if markdown.startswith("```markdown"):
|
||||
# markdown = "\n".join(markdown.strip().split("\n")[1:-1])
|
||||
# if tenant.embed_tuning:
|
||||
# current_app.embed_tuning_logger.debug(f'Markdown Received: \n '
|
||||
# f'------------------\n'
|
||||
# f'{markdown}\n'
|
||||
# f'------------------\n')
|
||||
# md_header_splits = markdown_splitter.split_text(markdown)
|
||||
# markdown_chunks = [doc.page_content for doc in md_header_splits]
|
||||
# # claude-3.5-sonnet returns introductory text
|
||||
# if not markdown_chunks[0].startswith('#'):
|
||||
# markdown_chunks.pop(0)
|
||||
# last_markdown_chunk = markdown_chunks[-1]
|
||||
# last_markdown_chunk = "\n".join(markdown.strip().split("\n")[1:])
|
||||
# markdown_chunks.pop()
|
||||
# all_markdown_chunks += markdown_chunks
|
||||
#
|
||||
# all_markdown_chunks += [last_markdown_chunk]
|
||||
#
|
||||
# annotated_transcript = '\n'.join(all_markdown_chunks)
|
||||
#
|
||||
# # Upload the annotated transcript to MinIO
|
||||
# minio_client.upload_document_file(
|
||||
# tenant.id,
|
||||
# document_version.doc_id,
|
||||
# document_version.language,
|
||||
# document_version.id,
|
||||
# output_file,
|
||||
# annotated_transcript.encode('utf-8')
|
||||
# )
|
||||
#
|
||||
# current_app.logger.info(f'Annotated transcription for tenant {tenant.id}')
|
||||
# except Exception as e:
|
||||
# current_app.logger.error(f'Error annotating transcription for tenant {tenant.id}, with error: {e}')
|
||||
# raise
|
||||
|
||||
|
||||
def create_potential_chunks_for_markdown(tenant_id, document_version, input_file):
|
||||
@@ -779,4 +561,3 @@ def combine_chunks_for_markdown(potential_chunks, min_chars, max_chars):
|
||||
actual_chunks.append(current_chunk)
|
||||
|
||||
return actual_chunks
|
||||
|
||||
|
||||
Binary file not shown.
@@ -3,7 +3,7 @@
|
||||
Plugin Name: EveAI Chat Widget
|
||||
Plugin URI: https://askeveai.com/
|
||||
Description: Integrates the EveAI chat interface into your WordPress site.
|
||||
Version: 1.4.1
|
||||
Version: 1.5.0
|
||||
Author: Josako, Pieter Laroy
|
||||
Author URI: https://askeveai.com/about/
|
||||
*/
|
||||
@@ -28,7 +28,8 @@ function eveai_chat_shortcode($atts) {
|
||||
'api_key' => '',
|
||||
'domain' => '',
|
||||
'language' => 'en',
|
||||
'supported_languages' => 'en,fr,de,es'
|
||||
'supported_languages' => 'en,fr,de,es',
|
||||
'server_url' => 'https://evie.askeveai.com'
|
||||
);
|
||||
|
||||
// Merge provided attributes with defaults
|
||||
@@ -40,6 +41,7 @@ function eveai_chat_shortcode($atts) {
|
||||
$domain = esc_url_raw($atts['domain']);
|
||||
$language = sanitize_text_field($atts['language']);
|
||||
$supported_languages = sanitize_text_field($atts['supported_languages']);
|
||||
$server_url = esc_url_raw($atts['server_url']);
|
||||
|
||||
// Generate a unique ID for this instance of the chat widget
|
||||
$chat_id = 'chat-container-' . uniqid();
|
||||
@@ -52,7 +54,8 @@ function eveai_chat_shortcode($atts) {
|
||||
'$api_key',
|
||||
'$domain',
|
||||
'$language',
|
||||
'$supported_languages'
|
||||
'$supported_languages',
|
||||
'$server_url'
|
||||
);
|
||||
eveAI.initializeChat('$chat_id');
|
||||
});
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
class EveAIChatWidget extends HTMLElement {
|
||||
static get observedAttributes() {
|
||||
return ['tenant-id', 'api-key', 'domain', 'language', 'languages'];
|
||||
return ['tenant-id', 'api-key', 'domain', 'language', 'languages', 'server-url'];
|
||||
}
|
||||
|
||||
constructor() {
|
||||
@@ -87,6 +87,7 @@ class EveAIChatWidget extends HTMLElement {
|
||||
this.language = this.getAttribute('language');
|
||||
const languageAttr = this.getAttribute('languages');
|
||||
this.languages = languageAttr ? languageAttr.split(',') : [];
|
||||
this.serverUrl = this.getAttribute('server-url');
|
||||
this.currentLanguage = this.language;
|
||||
console.log('Updated attributes:', {
|
||||
tenantId: this.tenantId,
|
||||
@@ -94,7 +95,8 @@ class EveAIChatWidget extends HTMLElement {
|
||||
domain: this.domain,
|
||||
language: this.language,
|
||||
currentLanguage: this.currentLanguage,
|
||||
languages: this.languages
|
||||
languages: this.languages,
|
||||
serverUrl: this.serverUrl
|
||||
});
|
||||
}
|
||||
|
||||
@@ -104,14 +106,16 @@ class EveAIChatWidget extends HTMLElement {
|
||||
const domain = this.getAttribute('domain');
|
||||
const language = this.getAttribute('language');
|
||||
const languages = this.getAttribute('languages');
|
||||
const serverUrl = this.getAttribute('server-url');
|
||||
console.log('Checking if all attributes are set:', {
|
||||
tenantId,
|
||||
apiKey,
|
||||
domain,
|
||||
language,
|
||||
languages
|
||||
languages,
|
||||
serverUrl
|
||||
});
|
||||
return tenantId && apiKey && domain && language && languages;
|
||||
return tenantId && apiKey && domain && language && languages && serverUrl;
|
||||
}
|
||||
|
||||
createLanguageDropdown() {
|
||||
@@ -142,7 +146,7 @@ class EveAIChatWidget extends HTMLElement {
|
||||
console.log(`Initializing socket connection to Evie`);
|
||||
|
||||
// Ensure apiKey is passed in the query parameters
|
||||
this.socket = io('https://evie.askeveai.com', {
|
||||
this.socket = io(this.serverUrl, {
|
||||
path: '/chat/socket.io/',
|
||||
transports: ['websocket', 'polling'],
|
||||
query: {
|
||||
|
||||
@@ -1,13 +1,14 @@
|
||||
// static/js/eveai-sdk.js
|
||||
class EveAI {
|
||||
constructor(tenantId, apiKey, domain, language, languages) {
|
||||
constructor(tenantId, apiKey, domain, language, languages, serverUrl) {
|
||||
this.tenantId = tenantId;
|
||||
this.apiKey = apiKey;
|
||||
this.domain = domain;
|
||||
this.language = language;
|
||||
this.languages = languages;
|
||||
this.serverUrl = serverUrl;
|
||||
|
||||
console.log('EveAI constructor:', { tenantId, apiKey, domain, language, languages });
|
||||
console.log('EveAI constructor:', { tenantId, apiKey, domain, language, languages, serverUrl });
|
||||
}
|
||||
|
||||
initializeChat(containerId) {
|
||||
@@ -21,6 +22,7 @@ class EveAI {
|
||||
chatWidget.setAttribute('domain', this.domain);
|
||||
chatWidget.setAttribute('language', this.language);
|
||||
chatWidget.setAttribute('languages', this.languages);
|
||||
chatWidget.setAttribute('server-url', this.serverUrl);
|
||||
});
|
||||
} else {
|
||||
console.error('Container not found');
|
||||
|
||||
@@ -3,7 +3,7 @@ Contributors: Josako
|
||||
Tags: chat, ai
|
||||
Requires at least: 5.0
|
||||
Tested up to: 5.9
|
||||
Stable tag: 1.4.1
|
||||
Stable tag: 1.5.0
|
||||
License: GPLv2 or later
|
||||
License URI: http://www.gnu.org/licenses/gpl-2.0.html
|
||||
|
||||
@@ -23,10 +23,10 @@ This plugin allows you to easily add the EveAI chat widget to your WordPress sit
|
||||
|
||||
To add an EveAI Chat Widget to your page or post, use the following shortcode:
|
||||
|
||||
[eveai_chat tenant_id="YOUR_TENANT_ID" api_key="YOUR_API_KEY" domain="YOUR_DOMAIN" language="LANGUAGE_CODE" supported_languages="COMMA_SEPARATED_LANGUAGE_CODES"]
|
||||
[eveai_chat tenant_id="YOUR_TENANT_ID" api_key="YOUR_API_KEY" domain="YOUR_DOMAIN" language="LANGUAGE_CODE" supported_languages="COMMA_SEPARATED_LANGUAGE_CODES" server_url="Server URL for Evie"]
|
||||
|
||||
Example:
|
||||
[eveai_chat tenant_id="123456" api_key="your_api_key_here" domain="https://your-domain.com" language="en" supported_languages="en,fr,de,es"]
|
||||
[eveai_chat tenant_id="123456" api_key="your_api_key_here" domain="https://your-domain.com" language="en" supported_languages="en,fr,de,es" server_url="https://evie.askeveai.com"]
|
||||
|
||||
You can add multiple chat widgets with different configurations by using the shortcode multiple times with different parameters.
|
||||
|
||||
@@ -38,6 +38,9 @@ Contact your EveAI service provider to obtain your Tenant ID, API Key, and Domai
|
||||
|
||||
== Changelog ==
|
||||
|
||||
= 1.5.0 =
|
||||
* Allow for multiple servers to serve Evie
|
||||
|
||||
= 1.4.1 - 1.4...=
|
||||
* Bug fixes
|
||||
|
||||
|
||||
@@ -0,0 +1,68 @@
|
||||
"""Include excluded_classes for Tenant
|
||||
|
||||
Revision ID: 110be45f6e44
|
||||
Revises: 229774547fed
|
||||
Create Date: 2024-08-22 07:37:40.591220
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = '110be45f6e44'
|
||||
down_revision = '229774547fed'
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade():
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
with op.batch_alter_table('roles_users', schema=None) as batch_op:
|
||||
batch_op.drop_constraint('roles_users_user_id_fkey', type_='foreignkey')
|
||||
batch_op.drop_constraint('roles_users_role_id_fkey', type_='foreignkey')
|
||||
batch_op.create_foreign_key(None, 'role', ['role_id'], ['id'], referent_schema='public', ondelete='CASCADE')
|
||||
batch_op.create_foreign_key(None, 'user', ['user_id'], ['id'], referent_schema='public', ondelete='CASCADE')
|
||||
|
||||
with op.batch_alter_table('tenant', schema=None) as batch_op:
|
||||
batch_op.add_column(sa.Column('html_excluded_classes', postgresql.ARRAY(sa.String(length=200)), nullable=True))
|
||||
|
||||
with op.batch_alter_table('tenant_domain', schema=None) as batch_op:
|
||||
batch_op.drop_constraint('tenant_domain_updated_by_fkey', type_='foreignkey')
|
||||
batch_op.drop_constraint('tenant_domain_tenant_id_fkey', type_='foreignkey')
|
||||
batch_op.drop_constraint('tenant_domain_created_by_fkey', type_='foreignkey')
|
||||
batch_op.create_foreign_key(None, 'user', ['updated_by'], ['id'], referent_schema='public')
|
||||
batch_op.create_foreign_key(None, 'tenant', ['tenant_id'], ['id'], referent_schema='public')
|
||||
batch_op.create_foreign_key(None, 'user', ['created_by'], ['id'], referent_schema='public')
|
||||
|
||||
with op.batch_alter_table('user', schema=None) as batch_op:
|
||||
batch_op.drop_constraint('user_tenant_id_fkey', type_='foreignkey')
|
||||
batch_op.create_foreign_key(None, 'tenant', ['tenant_id'], ['id'], referent_schema='public')
|
||||
|
||||
# ### end Alembic commands ###
|
||||
|
||||
|
||||
def downgrade():
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
with op.batch_alter_table('user', schema=None) as batch_op:
|
||||
batch_op.drop_constraint(None, type_='foreignkey')
|
||||
batch_op.create_foreign_key('user_tenant_id_fkey', 'tenant', ['tenant_id'], ['id'])
|
||||
|
||||
with op.batch_alter_table('tenant_domain', schema=None) as batch_op:
|
||||
batch_op.drop_constraint(None, type_='foreignkey')
|
||||
batch_op.drop_constraint(None, type_='foreignkey')
|
||||
batch_op.drop_constraint(None, type_='foreignkey')
|
||||
batch_op.create_foreign_key('tenant_domain_created_by_fkey', 'user', ['created_by'], ['id'])
|
||||
batch_op.create_foreign_key('tenant_domain_tenant_id_fkey', 'tenant', ['tenant_id'], ['id'])
|
||||
batch_op.create_foreign_key('tenant_domain_updated_by_fkey', 'user', ['updated_by'], ['id'])
|
||||
|
||||
with op.batch_alter_table('tenant', schema=None) as batch_op:
|
||||
batch_op.drop_column('html_excluded_classes')
|
||||
|
||||
with op.batch_alter_table('roles_users', schema=None) as batch_op:
|
||||
batch_op.drop_constraint(None, type_='foreignkey')
|
||||
batch_op.drop_constraint(None, type_='foreignkey')
|
||||
batch_op.create_foreign_key('roles_users_role_id_fkey', 'role', ['role_id'], ['id'], ondelete='CASCADE')
|
||||
batch_op.create_foreign_key('roles_users_user_id_fkey', 'user', ['user_id'], ['id'], ondelete='CASCADE')
|
||||
|
||||
# ### end Alembic commands ###
|
||||
@@ -0,0 +1,68 @@
|
||||
"""Add API Key to tenant - next to Chat API key
|
||||
|
||||
Revision ID: ce6f5b62bbfb
|
||||
Revises: 110be45f6e44
|
||||
Create Date: 2024-08-29 07:43:20.662983
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = 'ce6f5b62bbfb'
|
||||
down_revision = '110be45f6e44'
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade():
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
with op.batch_alter_table('roles_users', schema=None) as batch_op:
|
||||
batch_op.drop_constraint('roles_users_role_id_fkey', type_='foreignkey')
|
||||
batch_op.drop_constraint('roles_users_user_id_fkey', type_='foreignkey')
|
||||
batch_op.create_foreign_key(None, 'user', ['user_id'], ['id'], referent_schema='public', ondelete='CASCADE')
|
||||
batch_op.create_foreign_key(None, 'role', ['role_id'], ['id'], referent_schema='public', ondelete='CASCADE')
|
||||
|
||||
with op.batch_alter_table('tenant', schema=None) as batch_op:
|
||||
batch_op.add_column(sa.Column('encrypted_api_key', sa.String(length=500), nullable=True))
|
||||
|
||||
with op.batch_alter_table('tenant_domain', schema=None) as batch_op:
|
||||
batch_op.drop_constraint('tenant_domain_tenant_id_fkey', type_='foreignkey')
|
||||
batch_op.drop_constraint('tenant_domain_updated_by_fkey', type_='foreignkey')
|
||||
batch_op.drop_constraint('tenant_domain_created_by_fkey', type_='foreignkey')
|
||||
batch_op.create_foreign_key(None, 'user', ['updated_by'], ['id'], referent_schema='public')
|
||||
batch_op.create_foreign_key(None, 'user', ['created_by'], ['id'], referent_schema='public')
|
||||
batch_op.create_foreign_key(None, 'tenant', ['tenant_id'], ['id'], referent_schema='public')
|
||||
|
||||
with op.batch_alter_table('user', schema=None) as batch_op:
|
||||
batch_op.drop_constraint('user_tenant_id_fkey', type_='foreignkey')
|
||||
batch_op.create_foreign_key(None, 'tenant', ['tenant_id'], ['id'], referent_schema='public')
|
||||
|
||||
# ### end Alembic commands ###
|
||||
|
||||
|
||||
def downgrade():
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
with op.batch_alter_table('user', schema=None) as batch_op:
|
||||
batch_op.drop_constraint(None, type_='foreignkey')
|
||||
batch_op.create_foreign_key('user_tenant_id_fkey', 'tenant', ['tenant_id'], ['id'])
|
||||
|
||||
with op.batch_alter_table('tenant_domain', schema=None) as batch_op:
|
||||
batch_op.drop_constraint(None, type_='foreignkey')
|
||||
batch_op.drop_constraint(None, type_='foreignkey')
|
||||
batch_op.drop_constraint(None, type_='foreignkey')
|
||||
batch_op.create_foreign_key('tenant_domain_created_by_fkey', 'user', ['created_by'], ['id'])
|
||||
batch_op.create_foreign_key('tenant_domain_updated_by_fkey', 'user', ['updated_by'], ['id'])
|
||||
batch_op.create_foreign_key('tenant_domain_tenant_id_fkey', 'tenant', ['tenant_id'], ['id'])
|
||||
|
||||
with op.batch_alter_table('tenant', schema=None) as batch_op:
|
||||
batch_op.drop_column('encrypted_api_key')
|
||||
|
||||
with op.batch_alter_table('roles_users', schema=None) as batch_op:
|
||||
batch_op.drop_constraint(None, type_='foreignkey')
|
||||
batch_op.drop_constraint(None, type_='foreignkey')
|
||||
batch_op.create_foreign_key('roles_users_user_id_fkey', 'user', ['user_id'], ['id'], ondelete='CASCADE')
|
||||
batch_op.create_foreign_key('roles_users_role_id_fkey', 'role', ['role_id'], ['id'], ondelete='CASCADE')
|
||||
|
||||
# ### end Alembic commands ###
|
||||
@@ -1,3 +1,4 @@
|
||||
import inspect
|
||||
import logging
|
||||
from logging.config import fileConfig
|
||||
|
||||
@@ -5,12 +6,12 @@ from flask import current_app
|
||||
|
||||
from alembic import context
|
||||
from sqlalchemy import NullPool, engine_from_config, text
|
||||
|
||||
from common.models.user import Tenant
|
||||
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.sql import schema
|
||||
import pgvector
|
||||
from pgvector.sqlalchemy import Vector
|
||||
|
||||
from common.models import document, interaction, user
|
||||
|
||||
# this is the Alembic Config object, which provides
|
||||
# access to the values within the .ini file in use.
|
||||
@@ -46,17 +47,34 @@ def get_engine_url():
|
||||
config.set_main_option('sqlalchemy.url', get_engine_url())
|
||||
target_db = current_app.extensions['migrate'].db
|
||||
|
||||
# other values from the config, defined by the needs of env.py,
|
||||
# can be acquired:
|
||||
# my_important_option = config.get_main_option("my_important_option")
|
||||
# ... etc.
|
||||
|
||||
def get_public_table_names():
|
||||
# TODO: This function should include the necessary functionality to automatically retrieve table names
|
||||
return ['role', 'roles_users', 'tenant', 'user', 'tenant_domain']
|
||||
|
||||
|
||||
PUBLIC_TABLES = get_public_table_names()
|
||||
logger.info(f"Public tables: {PUBLIC_TABLES}")
|
||||
|
||||
|
||||
def include_object(object, name, type_, reflected, compare_to):
|
||||
if type_ == "table" and name in PUBLIC_TABLES:
|
||||
logger.info(f"Excluding public table: {name}")
|
||||
return False
|
||||
logger.info(f"Including object: {type_} {name}")
|
||||
return True
|
||||
|
||||
|
||||
# List of Tenants
|
||||
tenants_ = Tenant.query.all()
|
||||
if tenants_:
|
||||
tenants = [tenant.id for tenant in tenants_]
|
||||
else:
|
||||
tenants = []
|
||||
def get_tenant_ids():
|
||||
from common.models.user import Tenant
|
||||
|
||||
tenants_ = Tenant.query.all()
|
||||
if tenants_:
|
||||
tenants = [tenant.id for tenant in tenants_]
|
||||
else:
|
||||
tenants = []
|
||||
return tenants
|
||||
|
||||
|
||||
def get_metadata():
|
||||
@@ -79,7 +97,10 @@ def run_migrations_offline():
|
||||
"""
|
||||
url = config.get_main_option("sqlalchemy.url")
|
||||
context.configure(
|
||||
url=url, target_metadata=get_metadata(), literal_binds=True
|
||||
url=url,
|
||||
target_metadata=get_metadata(),
|
||||
# literal_binds=True,
|
||||
include_object=include_object,
|
||||
)
|
||||
|
||||
with context.begin_transaction():
|
||||
@@ -99,18 +120,8 @@ def run_migrations_online():
|
||||
poolclass=NullPool,
|
||||
)
|
||||
|
||||
def process_revision_directives(context, revision, directives):
|
||||
if config.cmd_opts.autogenerate:
|
||||
script = directives[0]
|
||||
if script.upgrade_ops is not None:
|
||||
# Add import for pgvector and set search path
|
||||
script.upgrade_ops.ops.insert(0, sa.schema.ExecuteSQLOp(
|
||||
"SET search_path TO CURRENT_SCHEMA(), public; IMPORT pgvector",
|
||||
execution_options=None
|
||||
))
|
||||
|
||||
with connectable.connect() as connection:
|
||||
print(tenants)
|
||||
tenants = get_tenant_ids()
|
||||
for tenant in tenants:
|
||||
logger.info(f"Migrating tenant: {tenant}")
|
||||
# set search path on the connection, which ensures that
|
||||
@@ -127,7 +138,8 @@ def run_migrations_online():
|
||||
context.configure(
|
||||
connection=connection,
|
||||
target_metadata=get_metadata(),
|
||||
process_revision_directives=process_revision_directives,
|
||||
# literal_binds=True,
|
||||
include_object=include_object,
|
||||
)
|
||||
|
||||
with context.begin_transaction():
|
||||
|
||||
@@ -0,0 +1,27 @@
|
||||
"""Ensure logging information in document domain does not require user
|
||||
|
||||
Revision ID: 43eac8a7a00b
|
||||
Revises: 5d5437d81041
|
||||
Create Date: 2024-09-03 09:36:06.541938
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
import pgvector
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = '43eac8a7a00b'
|
||||
down_revision = '5d5437d81041'
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade():
|
||||
# Manual upgrade commands
|
||||
op.execute('ALTER TABLE document ALTER COLUMN created_by DROP NOT NULL')
|
||||
|
||||
|
||||
def downgrade():
|
||||
# Manual downgrade commands
|
||||
op.execute('ALTER TABLE document ALTER COLUMN created_by SET NOT NULL')
|
||||
30
nginx/public/chat_evie.html
Normal file
30
nginx/public/chat_evie.html
Normal file
@@ -0,0 +1,30 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Chat Client Evie</title>
|
||||
<link href="https://fonts.googleapis.com/icon?family=Material+Icons" rel="stylesheet">
|
||||
<script src="https://cdn.socket.io/4.0.1/socket.io.min.js"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
|
||||
<script src="/static/js/eveai-sdk.js" defer></script>
|
||||
<script src="/static/js/eveai-chat-widget.js" defer></script>
|
||||
<link rel="stylesheet" href="/static/css/eveai-chat-style.css">
|
||||
</head>
|
||||
<body>
|
||||
<div id="chat-container"></div>
|
||||
<script>
|
||||
document.addEventListener('DOMContentLoaded', function() {
|
||||
const eveAI = new EveAI(
|
||||
'2',
|
||||
'EveAI-CHAT-9079-8604-7441-6496-7604',
|
||||
'http://macstudio.ask-eve-ai-local.com',
|
||||
'en',
|
||||
'en,fr,nl',
|
||||
'http://macstudio.ask-eve-ai-local.com:8080/'
|
||||
);
|
||||
eveAI.initializeChat('chat-container');
|
||||
});
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
@@ -421,3 +421,90 @@ input[type="radio"] {
|
||||
box-shadow: none;
|
||||
}
|
||||
|
||||
/* Custom styles for chat session view */
|
||||
.accordion-button:not(.collapsed) {
|
||||
background-color: var(--bs-primary);
|
||||
color: var(--bs-white);
|
||||
}
|
||||
|
||||
.accordion-button:focus {
|
||||
box-shadow: 0 0 0 0.25rem rgba(118, 89, 154, 0.25);
|
||||
}
|
||||
|
||||
.interaction-question {
|
||||
font-size: 1rem; /* Normal text size */
|
||||
}
|
||||
|
||||
.interaction-icons {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
.interaction-icons .material-icons {
|
||||
font-size: 24px;
|
||||
margin-left: 8px;
|
||||
}
|
||||
|
||||
.thumb-icon.filled {
|
||||
color: var(--bs-success);
|
||||
}
|
||||
|
||||
.thumb-icon.outlined {
|
||||
color: var(--thumb-icon-outlined);
|
||||
}
|
||||
|
||||
/* Algorithm icon colors */
|
||||
.algorithm-icon.rag_tenant {
|
||||
color: var(--algorithm-color-rag-tenant);
|
||||
}
|
||||
|
||||
.algorithm-icon.rag_wikipedia {
|
||||
color: var(--algorithm-color-rag-wikipedia);
|
||||
}
|
||||
|
||||
.algorithm-icon.rag_google {
|
||||
color: var(--algorithm-color-rag-google);
|
||||
}
|
||||
|
||||
.algorithm-icon.llm {
|
||||
color: var(--algorithm-color-llm);
|
||||
}
|
||||
|
||||
.accordion-body {
|
||||
background-color: var(--bs-light);
|
||||
}
|
||||
|
||||
/* Markdown content styles */
|
||||
.markdown-content {
|
||||
font-size: 1rem;
|
||||
line-height: 1.5;
|
||||
}
|
||||
|
||||
.markdown-content p {
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
.markdown-content h1, .markdown-content h2, .markdown-content h3,
|
||||
.markdown-content h4, .markdown-content h5, .markdown-content h6 {
|
||||
margin-top: 1.5rem;
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
.markdown-content ul, .markdown-content ol {
|
||||
margin-bottom: 1rem;
|
||||
padding-left: 2rem;
|
||||
}
|
||||
|
||||
.markdown-content code {
|
||||
background-color: #f8f9fa;
|
||||
padding: 0.2em 0.4em;
|
||||
border-radius: 3px;
|
||||
}
|
||||
|
||||
.markdown-content pre {
|
||||
background-color: #f8f9fa;
|
||||
padding: 1rem;
|
||||
border-radius: 5px;
|
||||
overflow-x: auto;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,294 +0,0 @@
|
||||
/* eveai_chat.css */
|
||||
:root {
|
||||
--user-message-bg: #292929; /* Default user message background color */
|
||||
--bot-message-bg: #1e1e1e; /* Default bot message background color */
|
||||
--chat-bg: #1e1e1e; /* Default chat background color */
|
||||
--status-line-color: #e9e9e9; /* Color for the status line text */
|
||||
--status-line-bg: #1e1e1e; /* Background color for the status line */
|
||||
--status-line-height: 30px; /* Fixed height for the status line */
|
||||
|
||||
--algorithm-color-rag-tenant: #0f0; /* Green for RAG_TENANT */
|
||||
--algorithm-color-rag-wikipedia: #00f; /* Blue for RAG_WIKIPEDIA */
|
||||
--algorithm-color-rag-google: #ff0; /* Yellow for RAG_GOOGLE */
|
||||
--algorithm-color-llm: #800080; /* Purple for RAG_LLM */
|
||||
|
||||
/*--font-family: 'Arial, sans-serif'; !* Default font family *!*/
|
||||
--font-family: 'Segoe UI, Roboto, Cantarell, Noto Sans, Apple Color Emoji, Segoe UI Emoji, Segoe UI Symbol';
|
||||
--font-color: #e9e9e9; /* Default font color */
|
||||
--user-message-font-color: #e9e9e9; /* User message font color */
|
||||
--bot-message-font-color: #e9e9e9; /* Bot message font color */
|
||||
--input-bg: #292929; /* Input background color */
|
||||
--input-border: #ccc; /* Input border color */
|
||||
--input-text-color: #e9e9e9; /* Input text color */
|
||||
--button-color: #007bff; /* Button text color */
|
||||
|
||||
/* Variables for hyperlink backgrounds */
|
||||
--link-bg: #1e1e1e; /* Default background color for hyperlinks */
|
||||
--link-hover-bg: #1e1e1e; /* Background color on hover for hyperlinks */
|
||||
--link-color: #dec981; /* Default text color for hyperlinks */
|
||||
--link-hover-color: #D68F53; /* Text color on hover for hyperlinks */
|
||||
|
||||
/* New scrollbar variables */
|
||||
--scrollbar-bg: #292929; /* Background color for the scrollbar track */
|
||||
--scrollbar-thumb: #4b4b4b; /* Color for the scrollbar thumb */
|
||||
--scrollbar-thumb-hover: #dec981; /* Color for the thumb on hover */
|
||||
--scrollbar-thumb-active: #D68F53; /* Color for the thumb when active (dragged) */
|
||||
|
||||
/* Thumb colors */
|
||||
--thumb-icon-outlined: #4b4b4b;
|
||||
--thumb-icon-filled: #e9e9e9;
|
||||
|
||||
/* Connection Status colors */
|
||||
--status-connected-color: #28a745; /* Green color for connected status */
|
||||
--status-disconnected-color: #ffc107; /* Orange color for disconnected status */
|
||||
}
|
||||
|
||||
/* Connection status styles */
|
||||
.connection-status-icon {
|
||||
vertical-align: middle;
|
||||
font-size: 24px;
|
||||
margin-right: 8px;
|
||||
}
|
||||
|
||||
.status-connected {
|
||||
color: var(--status-connected-color);
|
||||
}
|
||||
|
||||
.status-disconnected {
|
||||
color: var(--status-disconnected-color);
|
||||
}
|
||||
|
||||
/* Custom scrollbar styles */
|
||||
.messages-area::-webkit-scrollbar {
|
||||
width: 12px; /* Width of the scrollbar */
|
||||
}
|
||||
|
||||
.messages-area::-webkit-scrollbar-track {
|
||||
background: var(--scrollbar-bg); /* Background color for the track */
|
||||
}
|
||||
|
||||
.messages-area::-webkit-scrollbar-thumb {
|
||||
background-color: var(--scrollbar-thumb); /* Color of the thumb */
|
||||
border-radius: 10px; /* Rounded corners for the thumb */
|
||||
border: 3px solid var(--scrollbar-bg); /* Space around the thumb */
|
||||
}
|
||||
|
||||
.messages-area::-webkit-scrollbar-thumb:hover {
|
||||
background-color: var(--scrollbar-thumb-hover); /* Color when hovering over the thumb */
|
||||
}
|
||||
|
||||
.messages-area::-webkit-scrollbar-thumb:active {
|
||||
background-color: var(--scrollbar-thumb-active); /* Color when active (dragging) */
|
||||
}
|
||||
|
||||
/* For Firefox */
|
||||
.messages-area {
|
||||
scrollbar-width: thin; /* Make scrollbar thinner */
|
||||
scrollbar-color: var(--scrollbar-thumb) var(--scrollbar-bg); /* Thumb and track colors */
|
||||
}
|
||||
|
||||
/* General Styles */
|
||||
.chat-container {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
height: 75vh;
|
||||
/*max-height: 100vh;*/
|
||||
max-width: 600px;
|
||||
margin: auto;
|
||||
border: 1px solid #ccc;
|
||||
border-radius: 8px;
|
||||
overflow: hidden;
|
||||
background-color: var(--chat-bg);
|
||||
font-family: var(--font-family); /* Apply the default font family */
|
||||
color: var(--font-color); /* Apply the default font color */
|
||||
}
|
||||
|
||||
.disclaimer {
|
||||
font-size: 0.7em;
|
||||
text-align: right;
|
||||
padding: 5px 20px 5px 5px;
|
||||
margin-bottom: 5px;
|
||||
}
|
||||
|
||||
.messages-area {
|
||||
flex: 1;
|
||||
overflow-y: auto;
|
||||
padding: 10px;
|
||||
background-color: var(--bot-message-bg);
|
||||
}
|
||||
|
||||
.message {
|
||||
max-width: 90%;
|
||||
margin-bottom: 10px;
|
||||
padding: 10px;
|
||||
border-radius: 15px;
|
||||
font-size: 1rem;
|
||||
}
|
||||
|
||||
.message.user {
|
||||
margin-left: auto;
|
||||
background-color: var(--user-message-bg);
|
||||
color: var(--user-message-font-color); /* Apply user message font color */
|
||||
}
|
||||
|
||||
.message.bot {
|
||||
background-color: var(--bot-message-bg);
|
||||
color: var(--bot-message-font-color); /* Apply bot message font color */
|
||||
}
|
||||
|
||||
.message-icons {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
/* Scoped styles for thumb icons */
|
||||
.thumb-icon.outlined {
|
||||
color: var(--thumb-icon-outlined); /* Color for outlined state */
|
||||
}
|
||||
|
||||
.thumb-icon.filled {
|
||||
color: var(--thumb-icon-filled); /* Color for filled state */
|
||||
}
|
||||
|
||||
/* Default styles for material icons */
|
||||
.material-icons {
|
||||
font-size: 24px;
|
||||
vertical-align: middle;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
.question-area {
|
||||
display: flex;
|
||||
flex-direction: row;
|
||||
align-items: center;
|
||||
background-color: var(--user-message-bg);
|
||||
padding: 10px;
|
||||
}
|
||||
|
||||
.language-select-container {
|
||||
width: 100%;
|
||||
margin-bottom: 10px; /* Spacing between the dropdown and the textarea */
|
||||
}
|
||||
|
||||
.language-select {
|
||||
width: 100%;
|
||||
margin-bottom: 5px; /* Space between the dropdown and the send button */
|
||||
padding: 8px;
|
||||
border-radius: 5px;
|
||||
border: 1px solid var(--input-border);
|
||||
background-color: var(--input-bg);
|
||||
color: var(--input-text-color);
|
||||
font-size: 1rem;
|
||||
}
|
||||
|
||||
.question-area textarea {
|
||||
flex: 1;
|
||||
border: none;
|
||||
padding: 10px;
|
||||
border-radius: 15px;
|
||||
background-color: var(--input-bg);
|
||||
border: 1px solid var(--input-border);
|
||||
color: var(--input-text-color);
|
||||
font-family: var(--font-family); /* Apply the default font family */
|
||||
font-size: 1rem;
|
||||
resize: vertical;
|
||||
min-height: 60px;
|
||||
max-height: 150px;
|
||||
overflow-y: auto;
|
||||
margin-right: 10px; /* Space between textarea and right-side container */
|
||||
}
|
||||
|
||||
.right-side {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
.question-area button {
|
||||
background: none;
|
||||
border: none;
|
||||
cursor: pointer;
|
||||
color: var(--button-color);
|
||||
}
|
||||
|
||||
/* Styles for the send icon */
|
||||
.send-icon {
|
||||
font-size: 24px;
|
||||
color: var(--button-color);
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
.send-icon.disabled {
|
||||
color: grey; /* Color for the disabled state */
|
||||
cursor: not-allowed; /* Change cursor to indicate disabled state */
|
||||
}
|
||||
|
||||
/* New CSS for the status-line */
|
||||
.status-line {
|
||||
height: var(--status-line-height); /* Fixed height for the status line */
|
||||
padding: 5px 10px;
|
||||
background-color: var(--status-line-bg); /* Background color */
|
||||
color: var(--status-line-color); /* Text color */
|
||||
font-size: 0.9rem; /* Slightly smaller font size */
|
||||
text-align: center; /* Centered text */
|
||||
border-top: 1px solid #ccc; /* Subtle top border */
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: flex-start;
|
||||
}
|
||||
|
||||
/* Algorithm-specific colors for fingerprint icon */
|
||||
.fingerprint-rag-tenant {
|
||||
color: var(--algorithm-color-rag-tenant);
|
||||
}
|
||||
|
||||
.fingerprint-rag-wikipedia {
|
||||
color: var(--algorithm-color-rag-wikipedia);
|
||||
}
|
||||
|
||||
.fingerprint-rag-google {
|
||||
color: var(--algorithm-color-rag-google);
|
||||
}
|
||||
|
||||
.fingerprint-llm {
|
||||
color: var(--algorithm-color-llm);
|
||||
}
|
||||
|
||||
/* Styling for citation links */
|
||||
.citations a {
|
||||
background-color: var(--link-bg); /* Apply default background color */
|
||||
color: var(--link-color); /* Apply default link color */
|
||||
padding: 2px 4px; /* Add padding for better appearance */
|
||||
border-radius: 3px; /* Add slight rounding for a modern look */
|
||||
text-decoration: none; /* Remove default underline */
|
||||
transition: background-color 0.3s, color 0.3s; /* Smooth transition for hover effects */
|
||||
}
|
||||
|
||||
.citations a:hover {
|
||||
background-color: var(--link-hover-bg); /* Background color on hover */
|
||||
color: var(--link-hover-color); /* Text color on hover */
|
||||
}
|
||||
|
||||
/* Media queries for responsiveness */
|
||||
@media (max-width: 768px) {
|
||||
.chat-container {
|
||||
max-width: 90%; /* Reduce max width on smaller screens */
|
||||
}
|
||||
}
|
||||
|
||||
@media (max-width: 480px) {
|
||||
.chat-container {
|
||||
max-width: 95%; /* Further reduce max width on very small screens */
|
||||
}
|
||||
|
||||
.question-area input {
|
||||
font-size: 0.9rem; /* Adjust input font size for smaller screens */
|
||||
}
|
||||
|
||||
.status-line {
|
||||
font-size: 0.8rem; /* Adjust status line font size for smaller screens */
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1,470 +0,0 @@
|
||||
class EveAIChatWidget extends HTMLElement {
|
||||
static get observedAttributes() {
|
||||
return ['tenant-id', 'api-key', 'domain', 'language', 'languages'];
|
||||
}
|
||||
|
||||
constructor() {
|
||||
super();
|
||||
this.socket = null; // Initialize socket to null
|
||||
this.attributesSet = false; // Flag to check if all attributes are set
|
||||
this.jwtToken = null; // Initialize jwtToken to null
|
||||
this.userTimezone = Intl.DateTimeFormat().resolvedOptions().timeZone; // Detect user's timezone
|
||||
this.heartbeatInterval = null;
|
||||
this.idleTime = 0; // in milliseconds
|
||||
this.maxConnectionIdleTime = 1 * 60 * 60 * 1000; // 1 hours in milliseconds
|
||||
this.languages = []
|
||||
this.room = null;
|
||||
console.log('EveAIChatWidget constructor called');
|
||||
}
|
||||
|
||||
connectedCallback() {
|
||||
console.log('connectedCallback called');
|
||||
this.innerHTML = this.getTemplate();
|
||||
this.messagesArea = this.querySelector('.messages-area');
|
||||
this.questionInput = this.querySelector('.question-area textarea');
|
||||
this.sendButton = this.querySelector('.send-icon');
|
||||
this.languageSelect = this.querySelector('.language-select');
|
||||
this.statusLine = this.querySelector('.status-line');
|
||||
this.statusMessage = this.querySelector('.status-message');
|
||||
this.connectionStatusIcon = this.querySelector('.connection-status-icon');
|
||||
|
||||
this.sendButton.addEventListener('click', () => this.handleSendMessage());
|
||||
this.questionInput.addEventListener('keydown', (event) => {
|
||||
if (event.key === 'Enter' && !event.shiftKey) {
|
||||
event.preventDefault(); // Prevent adding a new line
|
||||
this.handleSendMessage();
|
||||
}
|
||||
});
|
||||
|
||||
if (this.areAllAttributesSet() && !this.socket) {
|
||||
console.log('Attributes already set in connectedCallback, initializing socket');
|
||||
this.initializeSocket();
|
||||
}
|
||||
}
|
||||
|
||||
populateLanguageDropdown() {
|
||||
// Clear existing options
|
||||
this.languageSelect.innerHTML = '';
|
||||
console.log(`languages for options: ${this.languages}`)
|
||||
|
||||
// Populate with new options
|
||||
this.languages.forEach(lang => {
|
||||
const option = document.createElement('option');
|
||||
option.value = lang;
|
||||
option.textContent = lang.toUpperCase();
|
||||
if (lang === this.currentLanguage) {
|
||||
option.selected = true;
|
||||
}
|
||||
console.log(`Adding option for language: ${lang}`)
|
||||
this.languageSelect.appendChild(option);
|
||||
});
|
||||
|
||||
// Add event listener for language change
|
||||
this.languageSelect.addEventListener('change', (e) => {
|
||||
this.currentLanguage = e.target.value;
|
||||
// You might want to emit an event or update the backend about the language change
|
||||
});
|
||||
}
|
||||
|
||||
attributeChangedCallback(name, oldValue, newValue) {
|
||||
console.log(`attributeChangedCallback called: ${name} changed from ${oldValue} to ${newValue}`);
|
||||
this.updateAttributes();
|
||||
|
||||
if (this.areAllAttributesSet() && !this.socket) {
|
||||
console.log('All attributes set in attributeChangedCallback, initializing socket');
|
||||
this.attributesSet = true;
|
||||
console.log('All attributes are set, populating language dropdown');
|
||||
this.populateLanguageDropdown();
|
||||
console.log('All attributes are set, initializing socket')
|
||||
this.initializeSocket();
|
||||
}
|
||||
}
|
||||
|
||||
updateAttributes() {
|
||||
this.tenantId = this.getAttribute('tenant-id');
|
||||
this.apiKey = this.getAttribute('api-key');
|
||||
this.domain = this.getAttribute('domain');
|
||||
this.language = this.getAttribute('language');
|
||||
const languageAttr = this.getAttribute('languages');
|
||||
this.languages = languageAttr ? languageAttr.split(',') : [];
|
||||
this.currentLanguage = this.language;
|
||||
console.log('Updated attributes:', {
|
||||
tenantId: this.tenantId,
|
||||
apiKey: this.apiKey,
|
||||
domain: this.domain,
|
||||
language: this.language,
|
||||
currentLanguage: this.currentLanguage,
|
||||
languages: this.languages
|
||||
});
|
||||
}
|
||||
|
||||
areAllAttributesSet() {
|
||||
const tenantId = this.getAttribute('tenant-id');
|
||||
const apiKey = this.getAttribute('api-key');
|
||||
const domain = this.getAttribute('domain');
|
||||
const language = this.getAttribute('language');
|
||||
const languages = this.getAttribute('languages');
|
||||
console.log('Checking if all attributes are set:', {
|
||||
tenantId,
|
||||
apiKey,
|
||||
domain,
|
||||
language,
|
||||
languages
|
||||
});
|
||||
return tenantId && apiKey && domain && language && languages;
|
||||
}
|
||||
|
||||
createLanguageDropdown() {
|
||||
const select = document.createElement('select');
|
||||
select.id = 'languageSelect';
|
||||
this.languages.forEach(lang => {
|
||||
const option = document.createElement('option');
|
||||
option.value = lang;
|
||||
option.textContent = lang.toUpperCase();
|
||||
if (lang === this.currentLanguage) {
|
||||
option.selected = true;
|
||||
}
|
||||
select.appendChild(option);
|
||||
});
|
||||
select.addEventListener('change', (e) => {
|
||||
this.currentLanguage = e.target.value;
|
||||
// You might want to emit an event or update the backend about the language change
|
||||
});
|
||||
return select;
|
||||
}
|
||||
|
||||
initializeSocket() {
|
||||
if (this.socket) {
|
||||
console.log('Socket already initialized');
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`Initializing socket connection to Evie`);
|
||||
|
||||
// Ensure apiKey is passed in the query parameters
|
||||
this.socket = io('https://evie.askeveai.com', {
|
||||
path: '/chat/socket.io/',
|
||||
transports: ['websocket', 'polling'],
|
||||
query: {
|
||||
tenantId: this.tenantId,
|
||||
apiKey: this.apiKey // Ensure apiKey is included here
|
||||
},
|
||||
auth: {
|
||||
token: 'Bearer ' + this.apiKey // Ensure token is included here
|
||||
},
|
||||
reconnectionAttempts: Infinity, // Infinite reconnection attempts
|
||||
reconnectionDelay: 5000, // Delay between reconnections
|
||||
timeout: 20000 // Connection timeout
|
||||
});
|
||||
|
||||
console.log(`Finished initializing socket connection to Evie`);
|
||||
|
||||
this.socket.on('connect', (data) => {
|
||||
console.log('Socket connected OK');
|
||||
this.setStatusMessage('Connected to EveAI.');
|
||||
this.updateConnectionStatus(true);
|
||||
this.startHeartbeat();
|
||||
if (data.room) {
|
||||
this.room = data.room;
|
||||
console.log(`Joined room: ${this.room}`);
|
||||
}
|
||||
});
|
||||
|
||||
this.socket.on('authenticated', (data) => {
|
||||
console.log('Authenticated event received: ', data);
|
||||
this.setStatusMessage('Authenticated.');
|
||||
if (data.token) {
|
||||
this.jwtToken = data.token; // Store the JWT token received from the server
|
||||
}
|
||||
if (data.room) {
|
||||
this.room = data.room;
|
||||
console.log(`Confirmed room: ${this.room}`);
|
||||
}
|
||||
});
|
||||
|
||||
this.socket.on('connect_error', (err) => {
|
||||
console.error('Socket connection error:', err);
|
||||
this.setStatusMessage('EveAI Chat Widget needs further configuration by site administrator.');
|
||||
this.updateConnectionStatus(false);
|
||||
});
|
||||
|
||||
this.socket.on('connect_timeout', () => {
|
||||
console.error('Socket connection timeout');
|
||||
this.setStatusMessage('EveAI Chat Widget needs further configuration by site administrator.');
|
||||
this.updateConnectionStatus(false);
|
||||
});
|
||||
|
||||
this.socket.on('disconnect', (reason) => {
|
||||
console.log('Socket disconnected: ', reason);
|
||||
if (reason === 'io server disconnect') {
|
||||
// Server disconnected the socket
|
||||
this.socket.connect(); // Attempt to reconnect
|
||||
}
|
||||
this.setStatusMessage('Disconnected from EveAI. Please refresh the page for further interaction.');
|
||||
this.updateConnectionStatus(false);
|
||||
this.stopHeartbeat();
|
||||
});
|
||||
|
||||
this.socket.on('reconnect_attempt', () => {
|
||||
console.log('Attempting to reconnect to the server...');
|
||||
this.setStatusMessage('Attempting to reconnect...');
|
||||
});
|
||||
|
||||
this.socket.on('reconnect', () => {
|
||||
console.log('Successfully reconnected to the server');
|
||||
this.setStatusMessage('Reconnected to EveAI.');
|
||||
this.updateConnectionStatus(true);
|
||||
this.startHeartbeat();
|
||||
});
|
||||
|
||||
this.socket.on('bot_response', (data) => {
|
||||
if (data.tenantId === this.tenantId) {
|
||||
console.log('Initial response received:', data);
|
||||
console.log('Task ID received:', data.taskId);
|
||||
this.checkTaskStatus(data.taskId);
|
||||
this.setStatusMessage('Processing...');
|
||||
}
|
||||
});
|
||||
|
||||
this.socket.on('task_status', (data) => {
|
||||
console.log('Task status received:', data.status);
|
||||
console.log('Task ID received:', data.taskId);
|
||||
console.log('Citations type:', typeof data.citations, 'Citations:', data.citations);
|
||||
|
||||
if (data.status === 'pending') {
|
||||
this.updateProgress();
|
||||
setTimeout(() => this.checkTaskStatus(data.taskId), 1000); // Poll every second
|
||||
} else if (data.status === 'success') {
|
||||
this.addBotMessage(data.answer, data.interaction_id, data.algorithm, data.citations);
|
||||
this.clearProgress(); // Clear progress indicator when done
|
||||
} else {
|
||||
this.setStatusMessage('Failed to process message.');
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
setStatusMessage(message) {
|
||||
this.statusMessage.textContent = message;
|
||||
}
|
||||
|
||||
updateConnectionStatus(isConnected) {
|
||||
if (isConnected) {
|
||||
this.connectionStatusIcon.textContent = 'link';
|
||||
this.connectionStatusIcon.classList.remove('status-disconnected');
|
||||
this.connectionStatusIcon.classList.add('status-connected');
|
||||
} else {
|
||||
this.connectionStatusIcon.textContent = 'link_off';
|
||||
this.connectionStatusIcon.classList.remove('status-connected');
|
||||
this.connectionStatusIcon.classList.add('status-disconnected');
|
||||
}
|
||||
}
|
||||
|
||||
startHeartbeat() {
|
||||
this.stopHeartbeat(); // Clear any existing interval
|
||||
this.heartbeatInterval = setInterval(() => {
|
||||
if (this.socket && this.socket.connected) {
|
||||
this.socket.emit('heartbeat');
|
||||
this.idleTime += 30000;
|
||||
if (this.idleTime >= this.maxConnectionIdleTime) {
|
||||
this.socket.disconnect();
|
||||
this.setStatusMessage('Disconnected due to inactivity.');
|
||||
this.updateConnectionStatus(false);
|
||||
this.stopHeartbeat();
|
||||
}
|
||||
}
|
||||
}, 30000); // Send a heartbeat every 30 seconds
|
||||
}
|
||||
|
||||
stopHeartbeat() {
|
||||
if (this.heartbeatInterval) {
|
||||
clearInterval(this.heartbeatInterval);
|
||||
this.heartbeatInterval = null;
|
||||
}
|
||||
}
|
||||
|
||||
updateProgress() {
|
||||
if (!this.statusMessage.textContent) {
|
||||
this.statusMessage.textContent = 'Processing...';
|
||||
} else {
|
||||
this.statusMessage.textContent += '.'; // Append a dot
|
||||
}
|
||||
}
|
||||
|
||||
clearProgress() {
|
||||
this.statusMessage.textContent = '';
|
||||
this.toggleSendButton(false); // Re-enable and revert send button to outlined version
|
||||
}
|
||||
|
||||
checkTaskStatus(taskId) {
|
||||
this.updateProgress();
|
||||
this.socket.emit('check_task_status', { task_id: taskId });
|
||||
}
|
||||
|
||||
getTemplate() {
|
||||
return `
|
||||
<div class="chat-container">
|
||||
<div class="messages-area"></div>
|
||||
<div class="disclaimer">Evie can make mistakes. Please double-check responses.</div>
|
||||
<div class="question-area">
|
||||
<textarea placeholder="Type your message here..." rows="3"></textarea>
|
||||
<div class="right-side">
|
||||
<select class="language-select"></select>
|
||||
<i class="material-icons send-icon outlined">send</i>
|
||||
</div>
|
||||
</div>
|
||||
<div class="status-line">
|
||||
<i class="material-icons connection-status-icon">link_off</i>
|
||||
<span class="status-message"></span>
|
||||
</div>
|
||||
</div>
|
||||
`;
|
||||
}
|
||||
|
||||
addUserMessage(text) {
|
||||
const message = document.createElement('div');
|
||||
message.classList.add('message', 'user');
|
||||
message.innerHTML = `<p>${text}</p>`;
|
||||
this.messagesArea.appendChild(message);
|
||||
this.messagesArea.scrollTop = this.messagesArea.scrollHeight;
|
||||
}
|
||||
|
||||
handleFeedback(feedback, interactionId) {
|
||||
// Send feedback to the backend
|
||||
console.log('handleFeedback called');
|
||||
if (!this.socket) {
|
||||
console.error('Socket is not initialized');
|
||||
return;
|
||||
}
|
||||
if (!this.jwtToken) {
|
||||
console.error('JWT token is not available');
|
||||
return;
|
||||
}
|
||||
console.log('Sending message to backend');
|
||||
console.log(`Feedback for ${interactionId}: ${feedback}`);
|
||||
this.socket.emit('feedback', { tenantId: this.tenantId, token: this.jwtToken, feedback, interactionId });
|
||||
this.setStatusMessage('Feedback sent.');
|
||||
}
|
||||
|
||||
addBotMessage(text, interactionId, algorithm = 'default', citations = []) {
|
||||
const message = document.createElement('div');
|
||||
message.classList.add('message', 'bot');
|
||||
|
||||
let content = marked.parse(text);
|
||||
let citationsHtml = citations.map(url => `<a href="${url}" target="_blank">${url}</a>`).join('<br>');
|
||||
|
||||
let algorithmClass;
|
||||
switch (algorithm) {
|
||||
case 'RAG_TENANT':
|
||||
algorithmClass = 'fingerprint-rag-tenant';
|
||||
break;
|
||||
case 'RAG_WIKIPEDIA':
|
||||
algorithmClass = 'fingerprint-rag-wikipedia';
|
||||
break;
|
||||
case 'RAG_GOOGLE':
|
||||
algorithmClass = 'fingerprint-rag-google';
|
||||
break;
|
||||
case 'LLM':
|
||||
algorithmClass = 'fingerprint-llm';
|
||||
break;
|
||||
default:
|
||||
algorithmClass = '';
|
||||
}
|
||||
|
||||
message.innerHTML = `
|
||||
<p>${content}</p>
|
||||
${citationsHtml ? `<p class="citations">${citationsHtml}</p>` : ''}
|
||||
<div class="message-icons">
|
||||
<i class="material-icons ${algorithmClass}">fingerprint</i>
|
||||
<i class="material-icons thumb-icon outlined" data-feedback="up" data-interaction-id="${interactionId}">thumb_up_off_alt</i>
|
||||
<i class="material-icons thumb-icon outlined" data-feedback="down" data-interaction-id="${interactionId}">thumb_down_off_alt</i>
|
||||
</div>
|
||||
`;
|
||||
this.messagesArea.appendChild(message);
|
||||
|
||||
// Add event listeners for feedback buttons
|
||||
const thumbsUp = message.querySelector('i[data-feedback="up"]');
|
||||
const thumbsDown = message.querySelector('i[data-feedback="down"]');
|
||||
thumbsUp.addEventListener('click', () => this.toggleFeedback(thumbsUp, thumbsDown, 'up', interactionId));
|
||||
thumbsDown.addEventListener('click', () => this.toggleFeedback(thumbsUp, thumbsDown, 'down', interactionId));
|
||||
|
||||
this.messagesArea.scrollTop = this.messagesArea.scrollHeight;
|
||||
}
|
||||
|
||||
toggleFeedback(thumbsUp, thumbsDown, feedback, interactionId) {
|
||||
console.log('feedback called');
|
||||
this.idleTime = 0; // Reset idle time
|
||||
if (feedback === 'up') {
|
||||
thumbsUp.textContent = 'thumb_up'; // Change to filled icon
|
||||
thumbsUp.classList.remove('outlined');
|
||||
thumbsUp.classList.add('filled');
|
||||
thumbsDown.textContent = 'thumb_down_off_alt'; // Keep the other icon outlined
|
||||
thumbsDown.classList.add('outlined');
|
||||
thumbsDown.classList.remove('filled');
|
||||
} else {
|
||||
thumbsDown.textContent = 'thumb_down'; // Change to filled icon
|
||||
thumbsDown.classList.remove('outlined');
|
||||
thumbsDown.classList.add('filled');
|
||||
thumbsUp.textContent = 'thumb_up_off_alt'; // Keep the other icon outlined
|
||||
thumbsUp.classList.add('outlined');
|
||||
thumbsUp.classList.remove('filled');
|
||||
}
|
||||
|
||||
// Send feedback to the backend
|
||||
this.handleFeedback(feedback, interactionId);
|
||||
}
|
||||
|
||||
handleSendMessage() {
|
||||
console.log('handleSendMessage called');
|
||||
this.idleTime = 0; // Reset idle time
|
||||
const message = this.questionInput.value.trim();
|
||||
if (message) {
|
||||
this.addUserMessage(message);
|
||||
this.questionInput.value = '';
|
||||
this.sendMessageToBackend(message);
|
||||
this.toggleSendButton(true); // Disable and change send button to filled version
|
||||
}
|
||||
}
|
||||
|
||||
sendMessageToBackend(message) {
|
||||
console.log('sendMessageToBackend called');
|
||||
if (!this.socket) {
|
||||
console.error('Socket is not initialized');
|
||||
return;
|
||||
}
|
||||
if (!this.jwtToken) {
|
||||
console.error('JWT token is not available');
|
||||
return;
|
||||
}
|
||||
|
||||
const selectedLanguage = this.languageSelect.value;
|
||||
|
||||
console.log('Sending message to backend');
|
||||
this.socket.emit('user_message', {
|
||||
tenantId: this.tenantId,
|
||||
token: this.jwtToken,
|
||||
message,
|
||||
language: selectedLanguage,
|
||||
timezone: this.userTimezone
|
||||
});
|
||||
this.setStatusMessage('Processing started ...')
|
||||
}
|
||||
|
||||
toggleSendButton(isProcessing) {
|
||||
if (isProcessing) {
|
||||
this.sendButton.textContent = 'send'; // Filled send icon
|
||||
this.sendButton.classList.remove('outlined');
|
||||
this.sendButton.classList.add('filled');
|
||||
this.sendButton.classList.add('disabled'); // Add disabled class for styling
|
||||
this.sendButton.style.pointerEvents = 'none'; // Disable click events
|
||||
} else {
|
||||
this.sendButton.textContent = 'send'; // Outlined send icon
|
||||
this.sendButton.classList.add('outlined');
|
||||
this.sendButton.classList.remove('filled');
|
||||
this.sendButton.classList.remove('disabled'); // Remove disabled class
|
||||
this.sendButton.style.pointerEvents = 'auto'; // Re-enable click events
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
customElements.define('eveai-chat-widget', EveAIChatWidget);
|
||||
|
||||
@@ -1,29 +0,0 @@
|
||||
// static/js/eveai-sdk.js
|
||||
class EveAI {
|
||||
constructor(tenantId, apiKey, domain, language, languages) {
|
||||
this.tenantId = tenantId;
|
||||
this.apiKey = apiKey;
|
||||
this.domain = domain;
|
||||
this.language = language;
|
||||
this.languages = languages;
|
||||
|
||||
console.log('EveAI constructor:', { tenantId, apiKey, domain, language, languages });
|
||||
}
|
||||
|
||||
initializeChat(containerId) {
|
||||
const container = document.getElementById(containerId);
|
||||
if (container) {
|
||||
container.innerHTML = '<eveai-chat-widget></eveai-chat-widget>';
|
||||
customElements.whenDefined('eveai-chat-widget').then(() => {
|
||||
const chatWidget = container.querySelector('eveai-chat-widget');
|
||||
chatWidget.setAttribute('tenant-id', this.tenantId);
|
||||
chatWidget.setAttribute('api-key', this.apiKey);
|
||||
chatWidget.setAttribute('domain', this.domain);
|
||||
chatWidget.setAttribute('language', this.language);
|
||||
chatWidget.setAttribute('languages', this.languages);
|
||||
});
|
||||
} else {
|
||||
console.error('Container not found');
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -60,7 +60,6 @@ urllib3~=2.2.2
|
||||
WTForms~=3.1.2
|
||||
wtforms-html5~=0.6.1
|
||||
zxcvbn~=4.4.28
|
||||
pytube~=15.0.0
|
||||
groq~=0.9.0
|
||||
pydub~=0.25.1
|
||||
argparse~=1.4.0
|
||||
@@ -73,4 +72,5 @@ graypy~=2.1.0
|
||||
lxml~=5.3.0
|
||||
pillow~=10.4.0
|
||||
pdfplumber~=0.11.4
|
||||
PyPDF2~=3.0.1
|
||||
PyPDF2~=3.0.1
|
||||
Flask-RESTful~=0.3.10
|
||||
Reference in New Issue
Block a user