11 Commits

46 changed files with 5810 additions and 113 deletions

1
.gitignore vendored
View File

@@ -45,3 +45,4 @@ scripts/__pycache__/run_eveai_app.cpython-312.pyc
*repo.txt
/docker/eveai_logs/
/integrations/Wordpress/eveai_sync.zip
/integrations/Wordpress/eveai-chat.zip

View File

@@ -25,13 +25,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Security
- In case of vulnerabilities.
## [2.0.0-alfa]
## [2.0.1-alfa]
### Added
- Introduction of dynamic Retrievers & Specialists
- Introduction of dynamic Processors
- Introduction of caching system
- Introduction of a better template manager
- Zapîer Integration (partial - only adding files).
- Addition of general chunking parameters (chunking_heading_level and chunking_patterns)
- Addition of DocX and markdown Processor Types
### Changed
- For changes in existing functionality.
@@ -43,11 +42,33 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- For now removed features.
### Fixed
- Set default language when registering Documents or URLs.
- Ensure the RAG Specialist is using the detailed_question
- Wordpress Chat Plugin: languages dropdown filled again
- OpenAI update - proxies no longer supported
- Build & Release script for Wordpress Plugins (including end user download folder)
### Security
- In case of vulnerabilities.
## [2.0.0-alfa]
### Added
- Introduction of dynamic Retrievers & Specialists
- Introduction of dynamic Processors
- Introduction of caching system
- Introduction of a better template manager
- Modernisation of external API/Socket authentication using projects
- Creation of new eveai_chat WordPress plugin to support specialists
### Changed
- Update of eveai_sync WordPress plugin
### Fixed
- Set default language when registering Documents or URLs.
### Security
- Security improvements to Docker images
## [1.0.14-alfa]
### Added

View File

@@ -64,6 +64,20 @@ class TaggingFields(BaseModel):
}
class ChunkingPatternsField(BaseModel):
"""Represents a set of chunking patterns"""
patterns: List[str]
@field_validator('patterns')
def validate_patterns(cls, patterns):
for pattern in patterns:
try:
re.compile(pattern)
except re.error as e:
raise ValueError(f"Invalid regex pattern '{pattern}': {str(e)}")
return patterns
class ArgumentConstraint(BaseModel):
"""Base class for all argument constraints"""
description: Optional[str] = None
@@ -610,4 +624,39 @@ def _generate_yaml_docs(fields: Dict[str, Any], version: str) -> str:
}
}
return yaml.dump(doc, sort_keys=False, default_flow_style=False)
return yaml.dump(doc, sort_keys=False, default_flow_style=False)
def patterns_to_json(text_area_content: str) -> str:
"""Convert line-based patterns to JSON"""
text_area_content = text_area_content.strip()
if len(text_area_content) == 0:
return json.dumps([])
# Split on newlines and remove empty lines
patterns = [line.strip() for line in text_area_content.split('\n') if line.strip()]
return json.dumps(patterns)
def json_to_patterns(json_content: str) -> str:
"""Convert JSON patterns list to text area content"""
try:
patterns = json.loads(json_content)
if not isinstance(patterns, list):
raise ValueError("JSON must contain a list of patterns")
# Join with newlines
return '\n'.join(patterns)
except json.JSONDecodeError as e:
raise ValueError(f"Invalid JSON format: {e}")
def json_to_pattern_list(json_content: str) -> list:
"""Convert JSON patterns list to text area content"""
try:
patterns = json.loads(json_content)
if not isinstance(patterns, list):
raise ValueError("JSON must contain a list of patterns")
# Unescape if needed
patterns = [pattern.replace('\\\\', '\\') for pattern in patterns]
return patterns
except json.JSONDecodeError as e:
raise ValueError(f"Invalid JSON format: {e}")

View File

@@ -1,6 +1,7 @@
from flask import request, session
import time
from flask_security import current_user
import json
def log_request_middleware(app):
@@ -59,3 +60,97 @@ def log_request_middleware(app):
@app.after_request
def log_session_state_after(response):
return response
def register_request_debugger(app):
@app.before_request
def debug_request_info():
"""Log consolidated request information for debugging"""
# Skip health check endpoints
if request.path.startswith('/_healthz') or request.path.startswith('/healthz'):
return
# Gather all request information in a structured way
debug_info = {
"basic_info": {
"method": request.method,
"path": request.path,
"content_type": request.content_type,
"content_length": request.content_length
},
"environment": {
"remote_addr": request.remote_addr,
"user_agent": str(request.user_agent)
}
}
# Add headers (excluding sensitive ones)
safe_headers = {k: v for k, v in request.headers.items()
if k.lower() not in ('authorization', 'cookie', 'x-api-key')}
debug_info["headers"] = safe_headers
# Add authentication info (presence only)
auth_header = request.headers.get('Authorization', '')
debug_info["auth_info"] = {
"has_auth_header": bool(auth_header),
"auth_type": auth_header.split(' ')[0] if auth_header else None,
"token_length": len(auth_header.split(' ')[1]) if auth_header and len(auth_header.split(' ')) > 1 else 0,
"header_format": 'Valid format' if auth_header.startswith('Bearer ') else 'Invalid format',
"raw_header": auth_header[:10] + '...' if auth_header else None # Show first 10 chars only
}
# Add request data based on type
if request.is_json:
try:
json_data = request.get_json()
if isinstance(json_data, dict):
# Remove sensitive fields from logging
safe_json = {k: v for k, v in json_data.items()
if not any(sensitive in k.lower()
for sensitive in ['password', 'token', 'secret', 'key'])}
debug_info["request_data"] = {
"type": "json",
"content": safe_json
}
except Exception as e:
debug_info["request_data"] = {
"type": "json",
"error": str(e)
}
elif request.form:
safe_form = {k: v for k, v in request.form.items()
if not any(sensitive in k.lower()
for sensitive in ['password', 'token', 'secret', 'key'])}
debug_info["request_data"] = {
"type": "form",
"content": safe_form
}
# Add file information if present
if request.files:
debug_info["files"] = {
name: {
"filename": f.filename,
"content_type": f.content_type,
"content_length": f.content_length if hasattr(f, 'content_length') else None
}
for name, f in request.files.items()
}
# Add CORS information if present
cors_headers = {
"origin": request.headers.get('Origin'),
"request_method": request.headers.get('Access-Control-Request-Method'),
"request_headers": request.headers.get('Access-Control-Request-Headers')
}
if any(cors_headers.values()):
debug_info["cors"] = {k: v for k, v in cors_headers.items() if v is not None}
# Format the debug info as a pretty-printed JSON string with indentation
formatted_debug_info = json.dumps(debug_info, indent=2, sort_keys=True)
# Log everything in a single statement
app.logger.debug(
"Request Debug Information\n",
extra={"request_debug\n": formatted_debug_info}
)

View File

@@ -12,7 +12,7 @@ import requests
from urllib.parse import urlparse, unquote, urlunparse
import os
from .eveai_exceptions import (EveAIInvalidLanguageException, EveAIDoubleURLException, EveAIUnsupportedFileType,
EveAIInvalidCatalog, EveAIInvalidDocument, EveAIInvalidDocumentVersion)
EveAIInvalidCatalog, EveAIInvalidDocument, EveAIInvalidDocumentVersion, EveAIException)
from ..models.user import Tenant
@@ -219,12 +219,6 @@ def start_embedding_task(tenant_id, doc_vers_id):
return task.id
def validate_file_type(extension):
if extension not in current_app.config['SUPPORTED_FILE_TYPES']:
raise EveAIUnsupportedFileType(f"Filetype {extension} is currently not supported. "
f"Supported filetypes: {', '.join(current_app.config['SUPPORTED_FILE_TYPES'])}")
def get_filename_from_url(url):
parsed_url = urlparse(url)
path_parts = parsed_url.path.split('/')
@@ -363,3 +357,109 @@ def cope_with_local_url(url):
return url
def lookup_document(tenant_id: int, lookup_criteria: dict, metadata_type: str) -> tuple[Document, DocumentVersion]:
"""
Look up a document using metadata criteria
Args:
tenant_id: ID of the tenant
lookup_criteria: Dictionary of key-value pairs to match in metadata
metadata_type: Which metadata to search in ('user_metadata' or 'system_metadata')
Returns:
Tuple of (Document, DocumentVersion) if found
Raises:
ValueError: If invalid metadata_type provided
EveAIException: If lookup fails
"""
if metadata_type not in ['user_metadata', 'system_metadata']:
raise ValueError(f"Invalid metadata_type: {metadata_type}")
try:
# Query for the latest document version matching the criteria
query = (db.session.query(Document, DocumentVersion)
.join(DocumentVersion)
.filter(Document.id == DocumentVersion.doc_id)
.order_by(DocumentVersion.id.desc()))
# Add metadata filtering using PostgreSQL JSONB operators
metadata_field = getattr(DocumentVersion, metadata_type)
for key, value in lookup_criteria.items():
query = query.filter(metadata_field[key].astext == str(value))
# Get first result
result = query.first()
if not result:
raise EveAIException(
f"No document found matching criteria in {metadata_type}",
status_code=404
)
return result
except SQLAlchemyError as e:
current_app.logger.error(f'Database error during document lookup for tenant {tenant_id}: {e}')
raise EveAIException(
"Database error during document lookup",
status_code=500
)
except Exception as e:
current_app.logger.error(f'Error during document lookup for tenant {tenant_id}: {e}')
raise EveAIException(
"Error during document lookup",
status_code=500
)
# Add to common/utils/document_utils.py
def refresh_document_with_content(doc_id: int, tenant_id: int, file_content: bytes, api_input: dict) -> tuple:
"""
Refresh document with new content
Args:
doc_id: Document ID
tenant_id: Tenant ID
file_content: New file content
api_input: Additional document information
Returns:
Tuple of (new_version, task_id)
"""
doc = Document.query.get(doc_id)
if not doc:
raise EveAIInvalidDocument(tenant_id, doc_id)
old_doc_vers = DocumentVersion.query.filter_by(doc_id=doc_id).order_by(desc(DocumentVersion.id)).first()
# Create new version with same file type as original
extension = old_doc_vers.file_type
new_doc_vers = create_version_for_document(
doc, tenant_id,
'', # No URL for content-based updates
old_doc_vers.sub_file_type,
api_input.get('language', old_doc_vers.language),
api_input.get('user_context', old_doc_vers.user_context),
api_input.get('user_metadata', old_doc_vers.user_metadata),
api_input.get('catalog_properties', old_doc_vers.catalog_properties),
)
try:
db.session.add(new_doc_vers)
db.session.commit()
except SQLAlchemyError as e:
db.session.rollback()
return None, str(e)
# Upload new content
upload_file_for_version(new_doc_vers, file_content, extension, tenant_id)
# Start embedding task
task = current_celery.send_task('create_embeddings', args=[tenant_id, new_doc_vers.id], queue='embeddings')
current_app.logger.info(f'Embedding creation started for document {doc_id} on version {new_doc_vers.id} '
f'with task id: {task.id}.')
return new_doc_vers, task.id

View File

@@ -55,7 +55,6 @@ class Config(object):
# file upload settings
MAX_CONTENT_LENGTH = 50 * 1024 * 1024
UPLOAD_EXTENSIONS = ['.txt', '.pdf', '.png', '.jpg', '.jpeg', '.gif']
# supported languages
SUPPORTED_LANGUAGES = ['en', 'fr', 'nl', 'de', 'es']
@@ -143,10 +142,7 @@ class Config(object):
LANGCHAIN_ENDPOINT = 'https://api.smith.langchain.com'
LANGCHAIN_PROJECT = "eveai"
SUPPORTED_FILE_TYPES = ['pdf', 'html', 'md', 'txt', 'mp3', 'mp4', 'ogg', 'srt']
TENANT_TYPES = ['Active', 'Demo', 'Inactive', 'Test', 'Wordpress Starter']
TENANT_TYPES = ['Active', 'Demo', 'Inactive', 'Test']
# The maximum number of seconds allowed for audio compression (to save resources)
MAX_COMPRESSION_DURATION = 60*10 # 10 minutes

View File

@@ -5,6 +5,19 @@ PROCESSOR_TYPES = {
"file_types": "html",
"Description": "A processor for HTML files",
"configuration": {
"chunking_patterns": {
"name": "Chunking Patterns",
"description": "A list of Patterns used to chunk files into logical pieces",
"type": "chunking_patterns",
"required": False
},
"chunking_heading_level": {
"name": "Chunking Heading Level",
"type": "integer",
"description": "Maximum heading level to consider for chunking (1-6)",
"required": False,
"default": 2
},
"html_tags": {
"name": "HTML Tags",
"type": "string",
@@ -45,7 +58,21 @@ PROCESSOR_TYPES = {
"name": "PDF Processor",
"file_types": "pdf",
"Description": "A Processor for PDF files",
"configuration": {}
"configuration": {
"chunking_patterns": {
"name": "Chunking Patterns",
"description": "A list of Patterns used to chunk files into logical pieces",
"type": "chunking_patterns",
"required": False
},
"chunking_heading_level": {
"name": "Chunking Heading Level",
"type": "integer",
"description": "Maximum heading level to consider for chunking (1-6)",
"required": False,
"default": 2
},
},
},
"AUDIO_PROCESSOR": {
"name": "AUDIO Processor",
@@ -53,4 +80,89 @@ PROCESSOR_TYPES = {
"Description": "A Processor for audio files",
"configuration": {}
},
"MARKDOWN_PROCESSOR": {
"name": "Markdown Processor",
"file_types": "md",
"Description": "A Processor for markdown files",
"configuration": {
"chunking_patterns": {
"name": "Chunking Patterns",
"description": "A list of Patterns used to chunk files into logical pieces",
"type": "chunking_patterns",
"required": False
},
"chunking_heading_level": {
"name": "Chunking Heading Level",
"type": "integer",
"description": "Maximum heading level to consider for chunking (1-6)",
"required": False,
"default": 2
},
}
},
"DOCX_PROCESSOR": {
"name": "DOCX Processor",
"file_types": "docx",
"Description": "A processor for DOCX files",
"configuration": {
"chunking_patterns": {
"name": "Chunking Patterns",
"description": "A list of Patterns used to chunk files into logical pieces",
"type": "chunking_patterns",
"required": False
},
"chunking_heading_level": {
"name": "Chunking Heading Level",
"type": "integer",
"description": "Maximum heading level to consider for chunking (1-6)",
"required": False,
"default": 2
},
"extract_comments": {
"name": "Extract Comments",
"type": "boolean",
"description": "Whether to include document comments in the markdown",
"required": False,
"default": False
},
"extract_headers_footers": {
"name": "Extract Headers/Footers",
"type": "boolean",
"description": "Whether to include headers and footers in the markdown",
"required": False,
"default": False
},
"preserve_formatting": {
"name": "Preserve Formatting",
"type": "boolean",
"description": "Whether to preserve bold, italic, and other text formatting",
"required": False,
"default": True
},
"list_style": {
"name": "List Style",
"type": "enum",
"description": "How to format lists in markdown",
"required": False,
"default": "dash",
"allowed_values": ["dash", "asterisk", "plus"]
},
"image_handling": {
"name": "Image Handling",
"type": "enum",
"description": "How to handle embedded images",
"required": False,
"default": "skip",
"allowed_values": ["skip", "extract", "placeholder"]
},
"table_alignment": {
"name": "Table Alignment",
"type": "enum",
"description": "How to align table contents",
"required": False,
"default": "left",
"allowed_values": ["left", "center", "preserve"]
}
}
}
}

View File

@@ -24,6 +24,8 @@ x-common-variables: &common-variables
MAIL_PORT: 465
REDIS_URL: redis
REDIS_PORT: '6379'
FLOWER_USER: 'Felucia'
FLOWER_PASSWORD: 'Jungles'
OPENAI_API_KEY: 'sk-proj-8R0jWzwjL7PeoPyMhJTZT3BlbkFJLb6HfRB2Hr9cEVFWEhU7'
GROQ_API_KEY: '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'
ANTHROPIC_API_KEY: 'sk-ant-api03-c2TmkzbReeGhXBO5JxNH6BJNylRDonc9GmZd0eRbrvyekec2'

View File

@@ -18,6 +18,7 @@ from .api.auth import auth_ns
from config.config import get_config
from common.utils.celery_utils import make_celery, init_celery
from common.utils.eveai_exceptions import EveAIException
from common.utils.debug_utils import register_request_debugger
def create_app(config_file=None):
@@ -55,6 +56,9 @@ def create_app(config_file=None):
# Register Error Handlers
register_error_handlers(app)
# Register Request Debugger
register_request_debugger(app)
@app.before_request
def check_cors():
if request.method == 'OPTIONS':

View File

@@ -1,10 +1,11 @@
from datetime import timedelta, datetime as dt, timezone as tz
from flask_restx import Namespace, Resource, fields
from flask_jwt_extended import create_access_token, verify_jwt_in_request, get_jwt
from flask_jwt_extended import create_access_token, verify_jwt_in_request, get_jwt, get_jwt_identity, jwt_required
from common.models.user import Tenant, TenantProject
from common.extensions import simple_encryption
from flask import current_app, request
from flask import current_app, jsonify, request
from functools import wraps
auth_ns = Namespace('auth', description='Authentication related operations')
@@ -36,7 +37,6 @@ class Token(Resource):
"""
Get JWT token
"""
current_app.logger.debug(f'Token Requested {auth_ns.payload}')
try:
tenant_id = int(auth_ns.payload['tenant_id'])
api_key = auth_ns.payload['api_key']
@@ -71,16 +71,19 @@ class Token(Resource):
current_app.logger.error(f"Project for given API key not found for Tenant: {tenant_id}")
return {'message': "Invalid API key"}, 401
if "DOCAPI" not in matching_project.services:
current_app.logger.error(f"Service DOCAPI not authorized for Project {matching_project.name} "
f"for Tenant: {tenant_id}")
return {'message': f"Service DOCAPI not authorized for Project {matching_project.name}"}, 403
# Get the JWT_ACCESS_TOKEN_EXPIRES setting from the app config
expires_delta = current_app.config.get('JWT_ACCESS_TOKEN_EXPIRES', timedelta(minutes=15))
try:
access_token = create_access_token(identity=tenant_id, expires_delta=expires_delta)
additional_claims = {
'services': matching_project.services,
}
access_token = create_access_token(
identity=tenant_id,
expires_delta=expires_delta,
additional_claims=additional_claims
)
current_app.logger.debug(f"Created token: {access_token}")
return {
'access_token': access_token,
'expires_in': expires_delta.total_seconds()
@@ -145,4 +148,51 @@ class TokenRefresh(Resource):
}, 200
except Exception as e:
current_app.logger.error(f"Token refresh failed: {str(e)}")
return {'message': 'Token refresh failed'}, 401
return {'message': 'Token refresh failed'}, 401
@auth_ns.route('/services')
class Services(Resource):
@jwt_required()
@auth_ns.doc(security='Bearer')
@auth_ns.response(200, 'Success', {
'services': fields.List(fields.String, description='List of allowed services for this token'),
'tenant_id': fields.Integer(description='Tenant ID associated with this token')
})
@auth_ns.response(401, 'Invalid or expired token')
def get(self):
"""
Get allowed services for the current token
"""
# Log the incoming authorization header
auth_header = request.headers.get('Authorization')
current_app.logger.debug(f"Received Authorization header: {auth_header}")
claims = get_jwt()
tenant_id = get_jwt_identity()
return {
'services': claims.get('services', []),
'tenant_id': tenant_id
}, 200
# Decorate function to check for a particular service
def requires_service(service_name):
def decorator(fn):
@wraps(fn)
def wrapper(*args, **kwargs):
# Get the JWT claims
claims = get_jwt()
services = claims.get('services', [])
if service_name not in services:
return {
'message': f'This endpoint requires the {service_name} service',
'error': 'Insufficient permissions'
}, 403
return fn(*args, **kwargs)
return wrapper
return decorator

View File

@@ -1,7 +1,9 @@
import io
import json
from datetime import datetime
import pytz
import requests
from flask import current_app, request
from flask_restx import Namespace, Resource, fields, reqparse
from flask_jwt_extended import jwt_required, get_jwt_identity
@@ -9,11 +11,12 @@ from werkzeug.datastructures import FileStorage
from werkzeug.utils import secure_filename
from common.utils.document_utils import (
create_document_stack, process_url, start_embedding_task,
validate_file_type, EveAIInvalidLanguageException, EveAIDoubleURLException, EveAIUnsupportedFileType,
EveAIInvalidLanguageException, EveAIDoubleURLException, EveAIUnsupportedFileType,
get_documents_list, edit_document, refresh_document, edit_document_version,
refresh_document_with_info
refresh_document_with_info, lookup_document, refresh_document_with_content
)
from common.utils.eveai_exceptions import EveAIException
from eveai_api.api.auth import requires_service
def validate_date(date_str):
@@ -59,25 +62,31 @@ add_document_response = document_ns.model('AddDocumentResponse', {
@document_ns.route('/add_document')
class AddDocument(Resource):
@jwt_required()
@requires_service('DOCAPI')
@document_ns.expect(upload_parser)
@document_ns.response(201, 'Document added successfully', add_document_response)
@document_ns.response(400, 'Validation Error')
@document_ns.response(500, 'Internal Server Error')
def post(self):
"""
Add a new document
Add a new document by providing the content of a file (Multipart/form-data).
"""
tenant_id = get_jwt_identity()
current_app.logger.info(f'Adding document for tenant {tenant_id}')
try:
args = upload_parser.parse_args()
except Exception as e:
current_app.logger.error(f"Error parsing arguments: {str(e)}")
current_app.logger.error(f"Exception type: {type(e)}")
raise
try:
file = args['file']
filename = secure_filename(file.filename)
extension = filename.rsplit('.', 1)[1].lower()
validate_file_type(extension)
# validate_file_type(extension)
api_input = {
'catalog_id': args.get('catalog_id'),
@@ -107,6 +116,113 @@ class AddDocument(Resource):
document_ns.abort(500, 'Error adding document')
# Models for AddDocumentThroughURL
add_document_through_url = document_ns.model('AddDocumentThroughURL', {
'catalog_id': fields.Integer(required=True, description='ID of the catalog the URL needs to be added to'),
'temp_url': fields.String(required=True, description='Temporary URL of the document to add'),
'name': fields.String(required=True, description='Name of the document'),
'language': fields.String(required=True, description='Language of the document'),
'user_context': fields.String(required=False, description='User context for the document'),
'valid_from': fields.String(required=False, description='Valid from date for the document'),
'user_metadata': fields.String(required=False, description='User metadata for the document'),
'system_metadata': fields.String(required=False, description='System metadata for the document'),
'catalog_properties': fields.String(required=False, description='The catalog configuration to be passed along (JSON '
'format). Validity is against catalog requirements '
'is not checked, and is the responsibility of the '
'calling client.'),
})
add_document_through_url_response = document_ns.model('AddDocumentThroughURLResponse', {
'message': fields.String(description='Status message'),
'document_id': fields.Integer(description='ID of the created document'),
'document_version_id': fields.Integer(description='ID of the created document version'),
'task_id': fields.String(description='ID of the embedding task')
})
@document_ns.route('/add_document_through_url')
class AddDocumentThroughURL(Resource):
@jwt_required()
@requires_service('DOCAPI')
@document_ns.expect(add_document_through_url)
@document_ns.response(201, 'Document added successfully', add_document_through_url)
@document_ns.response(400, 'Validation Error')
@document_ns.response(422, 'File could not be processed')
@document_ns.response(500, 'Internal Server Error')
def post(self):
"""
Add a new document using a URL. The URL can be temporary, and will not be stored.
Mainly used for passing temporary URLs like used in e.g. Zapier
"""
tenant_id = get_jwt_identity()
current_app.logger.info(f'Adding document through url for tenant {tenant_id}')
try:
args = document_ns.payload
except Exception as e:
current_app.logger.error(f"Error parsing arguments: {str(e)}")
current_app.logger.error(f"Exception type: {type(e)}")
raise
try:
# Step 1: Download from stashed URL
stashed_url = args['temp_url']
current_app.logger.info(f"Downloading stashed file from URL: {stashed_url}")
response = requests.get(stashed_url, stream=True)
response.raise_for_status()
hydration_url = response.text.strip()
current_app.logger.info(f"Downloading actual file from URL: {hydration_url}")
# Step 2: Download from hydration URL
actual_file_response = requests.get(hydration_url, stream=True)
actual_file_response.raise_for_status()
hydrated_file_content = actual_file_response.content
# Get filename from URL or use provided name
filename = secure_filename(args.get('name'))
extension = filename.rsplit('.', 1)[1].lower() if '.' in filename else ''
# Create FileStorage object from downloaded content
file_content = io.BytesIO(hydrated_file_content)
file = FileStorage(
stream=file_content,
filename=filename,
content_type=response.headers.get('content-type', 'application/octet-stream')
)
current_app.logger.info(f"Successfully downloaded file: {filename}")
except requests.RequestException as e:
current_app.logger.error(f"Error downloading file: {str(e)}")
return {'message': f'Error downloading file: {str(e)}'}, 422
try:
# Prepare API input
api_input = {
'catalog_id': args.get('catalog_id'),
'name': args.get('name') or filename,
'language': args.get('language'),
'user_context': args.get('user_context'),
'valid_from': args.get('valid_from'),
'user_metadata': args.get('user_metadata'),
'catalog_properties': args.get('catalog_properties'),
}
new_doc, new_doc_vers = create_document_stack(api_input, file, filename, extension, tenant_id)
task_id = start_embedding_task(tenant_id, new_doc_vers.id)
return {
'message': f'Processing on document {new_doc.name}, version {new_doc_vers.id} started. Task ID: {task_id}.',
'document_id': new_doc.id,
'document_version_id': new_doc_vers.id,
'task_id': task_id
}, 201
except (EveAIInvalidLanguageException, EveAIUnsupportedFileType) as e:
current_app.logger.error(f'Error adding document: {str(e)}')
return {'message': str(e)}, 400
except Exception as e:
current_app.logger.error(f'Error adding document: {str(e)}')
return {'message': 'Error adding document'}, 500
# Models for AddURL
add_url_model = document_ns.model('AddURL', {
'catalog_id': fields.Integer(required='True', description='ID of the catalog the URL needs to be added to'),
@@ -134,13 +250,15 @@ add_url_response = document_ns.model('AddURLResponse', {
@document_ns.route('/add_url')
class AddURL(Resource):
@jwt_required()
@requires_service('DOCAPI')
@document_ns.expect(add_url_model)
@document_ns.response(201, 'Document added successfully', add_url_response)
@document_ns.response(400, 'Validation Error')
@document_ns.response(500, 'Internal Server Error')
def post(self):
"""
Add a new document from URL
Add a new document from URL. The URL in this case is stored and can be used to refresh the document.
As a consequence, this must be a permanent and accessible URL.
"""
tenant_id = get_jwt_identity()
current_app.logger.info(f'Adding document from URL for tenant {tenant_id}')
@@ -190,6 +308,7 @@ document_list_model = document_ns.model('DocumentList', {
@document_ns.route('/list')
class DocumentList(Resource):
@jwt_required()
@requires_service('DOCAPI')
@document_ns.doc('list_documents')
@document_ns.marshal_list_with(document_list_model, envelope='documents')
def get(self):
@@ -210,6 +329,7 @@ edit_document_model = document_ns.model('EditDocument', {
@document_ns.route('/<int:document_id>')
class DocumentResource(Resource):
@jwt_required()
@requires_service('DOCAPI')
@document_ns.doc('edit_document')
@document_ns.expect(edit_document_model)
@document_ns.response(200, 'Document updated successfully')
@@ -217,7 +337,7 @@ class DocumentResource(Resource):
@document_ns.response(404, 'Document not found')
@document_ns.response(500, 'Internal Server Error')
def put(self, document_id):
"""Edit a document"""
"""Edit a document. The content of the document will not be refreshed!"""
try:
current_app.logger.debug(f'Editing document {document_id}')
data = request.json
@@ -232,10 +352,12 @@ class DocumentResource(Resource):
return e.to_dict(), e.status_code
@jwt_required()
@requires_service('DOCAPI')
@document_ns.doc('refresh_document')
@document_ns.response(200, 'Document refreshed successfully')
def post(self, document_id):
"""Refresh a document"""
"""Refresh a document. In this case, the content of the document will be refreshed! This requires the document
version to have a permanent and accessible URL!"""
tenant_id = get_jwt_identity()
new_version, result = refresh_document(document_id, tenant_id)
if new_version:
@@ -253,6 +375,7 @@ edit_document_version_model = document_ns.model('EditDocumentVersion', {
@document_ns.route('/version/<int:version_id>')
class DocumentVersionResource(Resource):
@jwt_required()
@requires_service('DOCAPI')
@document_ns.doc('edit_document_version')
@document_ns.expect(edit_document_version_model)
@document_ns.response(200, 'Document version updated successfully')
@@ -280,11 +403,13 @@ refresh_document_model = document_ns.model('RefreshDocument', {
@document_ns.route('/<int:document_id>/refresh')
class RefreshDocument(Resource):
@jwt_required()
@requires_service('DOCAPI')
@document_ns.response(200, 'Document refreshed successfully')
@document_ns.response(404, 'Document not found')
def post(self, document_id):
"""
Refresh a document without additional information
Refresh a document without additional information. In this case, the content of the document will be refreshed!
This requires the document version to have a permanent and accessible URL!
"""
tenant_id = get_jwt_identity()
current_app.logger.info(f'Refreshing document {document_id} for tenant {tenant_id}')
@@ -310,13 +435,14 @@ class RefreshDocument(Resource):
@document_ns.route('/<int:document_id>/refresh_with_info')
class RefreshDocumentWithInfo(Resource):
@jwt_required()
@requires_service('DOCAPI')
@document_ns.expect(refresh_document_model)
@document_ns.response(200, 'Document refreshed successfully')
@document_ns.response(400, 'Validation Error')
@document_ns.response(404, 'Document not found')
def post(self, document_id):
"""
Refresh a document with new information
Refresh a document with new version information.
"""
tenant_id = get_jwt_identity()
current_app.logger.info(f'Refreshing document {document_id} with info for tenant {tenant_id}')
@@ -338,3 +464,112 @@ class RefreshDocumentWithInfo(Resource):
except Exception as e:
current_app.logger.error(f'Error refreshing document with info: {str(e)}')
return {'message': 'Internal server error'}, 500
# Define models for lookup requests
lookup_model = document_ns.model('DocumentLookup', {
'lookup_criteria': fields.Raw(required=True,
description='JSON object containing key-value pairs to match in metadata. '
'Example: {"external_id": "123", "source": "zapier", "source_type": "google_docs"}'),
'metadata_type': fields.String(required=True, enum=['user_metadata', 'system_metadata'],
description='Which metadata field to search in')
})
lookup_response = document_ns.model('DocumentLookupResponse', {
'document_id': fields.Integer(description='ID of the found document'),
'document_version_id': fields.Integer(description='ID of the latest document version'),
'name': fields.String(description='Document name'),
'metadata': fields.Raw(description='Full metadata of the found document')
})
@document_ns.route('/lookup')
class DocumentLookup(Resource):
@jwt_required()
@requires_service('DOCAPI')
@document_ns.expect(lookup_model)
@document_ns.marshal_with(lookup_response)
@document_ns.response(200, 'Document found', lookup_response)
@document_ns.response(404, 'No document found matching criteria')
def post(self):
"""
Look up a document using metadata criteria
"""
tenant_id = get_jwt_identity()
try:
data = request.json
document, version = lookup_document(
tenant_id,
data['lookup_criteria'],
data['metadata_type']
)
return {
'document_id': document.id,
'document_version_id': version.id,
'name': document.name,
'metadata': getattr(version, data['metadata_type'])
}
except EveAIException as e:
return e.to_dict(), e.status_code
except KeyError as e:
return {'message': f'Missing required field: {str(e)}'}, 400
refresh_content_model = document_ns.model('RefreshDocumentContent', {
'file_content': fields.Raw(required=True, description='The new file content'),
'language': fields.String(required=False, description='Language of the document'),
'user_context': fields.String(required=False, description='User context for the document'),
'user_metadata': fields.Raw(required=False, description='Custom metadata fields'),
'catalog_properties': fields.Raw(required=False, description='Catalog-specific properties'),
'trigger_service': fields.String(required=False, description='Service that triggered the update')
})
@document_ns.route('/<int:document_id>/refresh_content')
class RefreshDocumentContent(Resource):
@jwt_required()
@requires_service('DOCAPI')
@document_ns.expect(refresh_content_model)
@document_ns.response(200, 'Document refreshed successfully')
def post(self, document_id):
"""Refresh a document with new content"""
tenant_id = get_jwt_identity()
try:
data = request.json
file_content = data['file_content']
# Build user_metadata by merging:
# 1. Existing metadata (if any)
# 2. New metadata from request
# 3. Zapier-specific fields
user_metadata = data.get('user_metadata', {})
user_metadata.update({
'source': 'zapier',
'trigger_service': data.get('trigger_service')
})
data['user_metadata'] = user_metadata
# Keep catalog_properties separate
if 'catalog_properties' in data:
# We could add validation here against catalog configuration
data['catalog_properties'] = data['catalog_properties']
new_version, task_id = refresh_document_with_content(
document_id,
tenant_id,
file_content,
data
)
return {
'message': f'Document refreshed successfully. New version: {new_version.id}. Task ID: {task_id}',
'document_id': document_id,
'document_version_id': new_version.id,
'task_id': task_id
}, 200
except EveAIException as e:
return e.to_dict(), e.status_code

View File

@@ -26,7 +26,7 @@ When you change chunking of embedding information, you'll need to manually refre
{{ render_field(field, disabled_fields, exclude_fields) }}
{% endfor %}
{% endfor %}
<button type="submit" class="btn btn-primary">Save Retriever</button>
<button type="submit" class="btn btn-primary">Save Catalog</button>
</form>
{% endblock %}

View File

@@ -129,7 +129,7 @@
<li class="nav-item">
<a href="/session_defaults" class="btn btn-sm bg-gradient-primary mb-0">
{% if 'tenant' in session %}
TENANT: {{ session['tenant'].get('name', 'None') }}
TENANT {{ session['tenant'].get('id', 'None') }}: {{ session['tenant'].get('name', 'None') }}
{% endif %}
</a>
</li>

View File

@@ -130,6 +130,21 @@ document.addEventListener('DOMContentLoaded', function() {
});
});
</script>
<script>
document.addEventListener('DOMContentLoaded', function() {
document.querySelectorAll('textarea[data-handle-enter="true"]').forEach(function(textarea) {
textarea.addEventListener('keydown', function(e) {
if (e.key === 'Enter' && e.shiftKey) {
e.preventDefault();
const start = this.selectionStart;
const end = this.selectionEnd;
this.value = this.value.substring(0, start) + '\n' + this.value.substring(end);
this.selectionStart = this.selectionEnd = start + 1;
}
});
});
});
</script>
<style>

View File

@@ -24,9 +24,12 @@
<!-- Tenant Selection Form -->
<form method="POST" action="{{ url_for('user_bp.handle_tenant_selection') }}">
{{ render_selectable_table(headers=["Tenant ID", "Tenant Name", "Website", "Type"], rows=rows, selectable=True, id="tenantsTable") }}
<div class="form-group mt-3">
<button type="submit" name="action" value="select_tenant" class="btn btn-primary">Set Session Tenant</button>
<button type="submit" name="action" value="edit_tenant" class="btn btn-secondary">Edit Tenant</button>
<div class="form-group mt-3 d-flex justify-content-between">
<div>
<button type="submit" name="action" value="select_tenant" class="btn btn-primary">Set Session Tenant</button>
<button type="submit" name="action" value="edit_tenant" class="btn btn-secondary">Edit Tenant</button>
</div>
<button type="submit" name="action" value="new_tenant" class="btn btn-secondary">New Tenant</button>
</div>
</form>

View File

@@ -15,14 +15,6 @@ from config.type_defs.retriever_types import RETRIEVER_TYPES
from .dynamic_form_base import DynamicFormBase
def allowed_file(form, field):
if field.data:
filename = field.data.filename
allowed_extensions = current_app.config.get('SUPPORTED_FILE_TYPES', [])
if not ('.' in filename and filename.rsplit('.', 1)[1].lower() in allowed_extensions):
raise ValidationError('Unsupported file type.')
def validate_json(form, field):
if field.data:
try:
@@ -101,7 +93,10 @@ class ProcessorForm(FlaskForm):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# Dynamically populate the 'type' field using the constructor
self.type.choices = [(key, value['name']) for key, value in PROCESSOR_TYPES.items()]
self.type.choices = sorted(
[(key, value['name']) for key, value in PROCESSOR_TYPES.items()],
key=lambda x: x[1],
)
class EditProcessorForm(DynamicFormBase):
@@ -177,7 +172,7 @@ class EditRetrieverForm(DynamicFormBase):
class AddDocumentForm(DynamicFormBase):
file = FileField('File', validators=[FileRequired(), allowed_file])
file = FileField('File', validators=[FileRequired()])
catalog = StringField('Catalog', render_kw={'readonly': True})
sub_file_type = StringField('Sub File Type', validators=[Optional(), Length(max=50)])
name = StringField('Name', validators=[Length(max=100)])

View File

@@ -14,7 +14,7 @@ import json
from common.models.document import Document, DocumentVersion, Catalog, Retriever, Processor
from common.extensions import db
from common.models.interaction import Specialist, SpecialistRetriever
from common.utils.document_utils import validate_file_type, create_document_stack, start_embedding_task, process_url, \
from common.utils.document_utils import create_document_stack, start_embedding_task, process_url, \
edit_document, \
edit_document_version, refresh_document
from common.utils.eveai_exceptions import EveAIInvalidLanguageException, EveAIUnsupportedFileType, \
@@ -391,9 +391,6 @@ def add_document():
sub_file_type = form.sub_file_type.data
filename = secure_filename(file.filename)
extension = filename.rsplit('.', 1)[1].lower()
validate_file_type(extension)
catalog_properties = {}
document_version_configurations = CATALOG_TYPES[catalog.type]['document_version_configurations']
for config in document_version_configurations:

View File

@@ -5,7 +5,46 @@ import json
from wtforms.fields.choices import SelectField
from wtforms.fields.datetime import DateField
from common.utils.config_field_types import TaggingFields
from common.utils.config_field_types import TaggingFields, json_to_patterns, patterns_to_json
class TaggingFieldsField(TextAreaField):
def __init__(self, *args, **kwargs):
kwargs['render_kw'] = {
'class': 'chunking-patterns-field',
'data-handle-enter': 'true'
}
super().__init__(*args, **kwargs)
# def _value(self):
# if self.data:
# return json.dumps(self.data)
# return ''
#
# def process_formdata(self, valuelist):
# if valuelist and valuelist[0]:
# try:
# self.data = json.loads(valuelist[0])
# except json.JSONDecodeError as e:
# raise ValueError('Not valid JSON content')
class ChunkingPatternsField(TextAreaField):
def __init__(self, *args, **kwargs):
kwargs['render_kw'] = {
'class': 'chunking-patterns-field',
'data-handle-enter': 'true'
}
super().__init__(*args, **kwargs)
# def _value(self):
# if self.data:
# return '\n'.join(self.data)
# return ''
#
# def process_formdata(self, valuelist):
# if valuelist and valuelist[0]:
# self.data = [line.strip() for line in valuelist[0].split('\n') if line.strip()]
class DynamicFormBase(FlaskForm):
@@ -80,7 +119,7 @@ class DynamicFormBase(FlaskForm):
# Handle special case for tagging_fields
if field_type == 'tagging_fields':
field_class = TextAreaField
field_class = TaggingFieldsField
extra_classes = 'json-editor'
field_kwargs = {}
elif field_type == 'enum':
@@ -89,6 +128,10 @@ class DynamicFormBase(FlaskForm):
choices = [(str(val), str(val)) for val in allowed_values]
extra_classes = ''
field_kwargs = {'choices': choices}
elif field_type == 'chunking_patterns':
field_class = ChunkingPatternsField
extra_classes = ['monospace-text', 'pattern-input']
field_kwargs = {}
else:
extra_classes = ''
field_class = {
@@ -111,6 +154,12 @@ class DynamicFormBase(FlaskForm):
except (TypeError, ValueError) as e:
current_app.logger.error(f"Error converting initial data to JSON: {e}")
field_data = "{}"
elif field_type == 'chunking_patterns':
try:
field_data = json_to_patterns(field_data)
except (TypeError, ValueError) as e:
current_app.logger.error(f"Error converting initial data to a list of patterns: {e}")
field_data = {}
elif default is not None:
field_data = default
@@ -173,12 +222,17 @@ class DynamicFormBase(FlaskForm):
original_field_name = full_field_name[prefix_length:]
field = getattr(self, full_field_name)
# Parse JSON for tagging_fields type
if isinstance(field, TextAreaField) and field.data:
if isinstance(field, TaggingFieldsField) and field.data:
try:
data[original_field_name] = json.loads(field.data)
except json.JSONDecodeError:
# Validation should catch this, but just in case
data[original_field_name] = field.data
elif isinstance(field, ChunkingPatternsField):
try:
data[original_field_name] = patterns_to_json(field.data)
except Exception as e:
current_app.logger.error(f"Error converting initial data to patterns: {e}")
else:
data[original_field_name] = field.data
return data
@@ -230,5 +284,3 @@ def validate_tagging_fields(form, field):
except (TypeError, ValueError) as e:
raise ValidationError(f"Invalid field definition: {str(e)}")

View File

@@ -243,6 +243,8 @@ def handle_tenant_selection():
return redirect(prefixed_url_for('user_bp.edit_tenant', tenant_id=tenant_id))
case 'select_tenant':
return redirect(prefixed_url_for('user_bp.tenant_overview'))
case 'new_tenant':
return redirect(prefixed_url_for('user_bp.tenant'))
# Add more conditions for other actions
return redirect(prefixed_url_for('select_tenant'))

View File

@@ -1,12 +1,14 @@
import logging
import logging.config
from flask import Flask, jsonify
from flask import Flask, jsonify, request
import os
from flask_jwt_extended import verify_jwt_in_request, get_jwt_identity
from common.extensions import db, socketio, jwt, cors, session, simple_encryption, metrics
from config.logging_config import LOGGING
from eveai_chat.socket_handlers import chat_handler
from common.utils.cors_utils import create_cors_after_request
from common.utils.cors_utils import create_cors_after_request, get_allowed_origins
from common.utils.celery_utils import make_celery, init_celery
from config.config import get_config
@@ -32,6 +34,32 @@ def create_app(config_file=None):
app.celery = make_celery(app.name, app.config)
init_celery(app.celery, app)
@app.before_request
def check_cors():
app.logger.debug('Checking CORS')
if request.method == 'OPTIONS':
app.logger.debug("Handling OPTIONS request")
return '', 200 # Allow OPTIONS to pass through
origin = request.headers.get('Origin')
if not origin:
return # Not a CORS request
# Get tenant ID from request
if verify_jwt_in_request():
tenant_id = get_jwt_identity()
if not tenant_id:
return
else:
return
# Check if origin is allowed for this tenant
allowed_origins = get_allowed_origins(tenant_id)
if origin not in allowed_origins:
app.logger.warning(f'Origin {origin} not allowed for tenant {tenant_id}')
return {'error': 'Origin not allowed'}, 403
app.logger.info("EveAI Chat Server Started Successfully")
app.logger.info("-------------------------------------------------------------------------------------------------")
return app
@@ -54,8 +82,17 @@ def register_extensions(app):
metrics.init_app(app)
# Cors setup
cors.init_app(app, resources={r"/chat/*": {"origins": "*"}})
app.after_request(create_cors_after_request('/chat'))
cors.init_app(app, resources={
r"/*": { # Make sure this matches your setup
"origins": "*",
"methods": ["GET", "POST", "PUT", "OPTIONS"],
"allow_headers": ["Content-Type", "Authorization", "X-Requested-With"],
"expose_headers": ["Content-Length", "Content-Range"],
"supports_credentials": True,
"max_age": 1728000,
"allow_credentials": True
}
})
session.init_app(app)

View File

@@ -256,7 +256,7 @@ class RAGSpecialist(BaseSpecialist):
structured_llm = llm.with_structured_output(output_schema)
chain = setup_and_retrieval | rag_prompt | structured_llm
raw_result = chain.invoke(query)
raw_result = chain.invoke(detailed_question)
result = SpecialistResult.create_for_type(
"STANDARD_RAG",
detailed_query=detailed_question,

View File

@@ -1,5 +1,5 @@
# Import all processor implementations to ensure registration
from . import audio_processor, html_processor, pdf_processor
from . import audio_processor, html_processor, pdf_processor, markdown_processor, docx_processor
# List of all available processor implementations
__all__ = ['audio_processor', 'html_processor', 'pdf_processor']
__all__ = ['audio_processor', 'html_processor', 'pdf_processor', 'markdown_processor', 'docx_processor']

View File

@@ -46,7 +46,7 @@ class AudioProcessor(TranscriptionBaseProcessor):
try:
audio_info = AudioSegment.from_file(temp_file_path, format=self.document_version.file_type)
total_duration = len(audio_info)
self._log_tuning("_compress_audio", {
self.log_tuning("_compress_audio", {
"Audio Duration (ms)": total_duration,
})
segment_length = self.max_compression_duration * 1000 # Convert to milliseconds
@@ -55,7 +55,7 @@ class AudioProcessor(TranscriptionBaseProcessor):
compressed_segments = AudioSegment.empty()
for i in range(total_chunks):
self._log_tuning("_compress_audio", {
self.log_tuning("_compress_audio", {
"Segment Nr": f"{i + 1} of {total_chunks}"
})
@@ -87,7 +87,7 @@ class AudioProcessor(TranscriptionBaseProcessor):
compressed_filename,
compressed_buffer.read()
)
self._log_tuning("_compress_audio", {
self.log_tuning("_compress_audio", {
"Compressed audio to MinIO": compressed_filename
})
@@ -172,14 +172,14 @@ class AudioProcessor(TranscriptionBaseProcessor):
transcriptions.append(trans)
self._log_tuning("_transcribe_audio", {
self.log_tuning("_transcribe_audio", {
"Chunk Nr": f"{i + 1} of {total_chunks}",
"Segment Duration": segment_duration,
"Transcription": trans,
})
else:
self._log("Warning: Received empty transcription", level='warning')
self._log_tuning("_transcribe_audio", {"ERROR": "No transcription"})
self.log_tuning("_transcribe_audio", {"ERROR": "No transcription"})
except Exception as e:
self._log(f"Error during transcription: {str(e)}", level='error')
@@ -202,7 +202,7 @@ class AudioProcessor(TranscriptionBaseProcessor):
transcription_filename,
full_transcription.encode('utf-8')
)
self._log_tuning(f"Saved transcription to MinIO: {transcription_filename}")
self.log_tuning(f"Saved transcription to MinIO: {transcription_filename}")
return full_transcription

View File

@@ -17,7 +17,7 @@ class BaseProcessor(ABC):
self.tuning_logger = None
self._setup_tuning_logger()
self._log_tuning("Processor initialized", {
self.log_tuning("Processor initialized", {
"processor_type": processor.type if processor else None,
"document_version": document_version.id if document_version else None,
"catalog": catalog.id if catalog else None
@@ -42,6 +42,10 @@ class BaseProcessor(ABC):
def process(self):
pass
@property
def configuration(self):
return self.processor.configuration
def _save_markdown(self, markdown):
markdown_filename = f"{self.document_version.id}.md"
minio_client.upload_document_file(
@@ -78,7 +82,7 @@ class BaseProcessor(ABC):
return markdown
def _log_tuning(self, message: str, data: Dict[str, Any] = None) -> None:
def log_tuning(self, message: str, data: Dict[str, Any] = None) -> None:
if self.tuning and self.tuning_logger:
try:
self.tuning_logger.log_tuning('processor', message, data)

View File

@@ -0,0 +1,129 @@
import docx
import io
from .base_processor import BaseProcessor
from .processor_registry import ProcessorRegistry
from common.extensions import minio_client
import re
class DocxProcessor(BaseProcessor):
def __init__(self, tenant, model_variables, document_version, catalog, processor):
super().__init__(tenant, model_variables, document_version, catalog, processor)
self.config = processor.configuration
self.extract_comments = self.config.get('extract_comments', False)
self.extract_headers_footers = self.config.get('extract_headers_footers', False)
self.preserve_formatting = self.config.get('preserve_formatting', True)
self.list_style = self.config.get('list_style', 'dash')
self.image_handling = self.config.get('image_handling', 'skip')
self.table_alignment = self.config.get('table_alignment', 'left')
def process(self):
try:
file_data = minio_client.download_document_file(
self.tenant.id,
self.document_version.bucket_name,
self.document_version.object_name,
)
doc = docx.Document(io.BytesIO(file_data))
markdown = self._convert_to_markdown(doc)
title = self._extract_title(doc)
self._save_markdown(markdown)
return markdown, title
except Exception as e:
self._log(f"Error processing DOCX: {str(e)}", level='error')
raise
def _convert_to_markdown(self, doc):
markdown_parts = []
if self.extract_headers_footers:
for section in doc.sections:
if section.header.paragraphs:
markdown_parts.extend(self._process_paragraphs(section.header.paragraphs))
markdown_parts.extend(self._process_paragraphs(doc.paragraphs))
if self.extract_comments and doc.comments:
markdown_parts.append("\n## Comments\n")
for comment in doc.comments:
markdown_parts.append(f"> {comment.text}\n")
return "\n".join(markdown_parts)
def _process_paragraphs(self, paragraphs):
markdown_parts = []
in_list = False
for para in paragraphs:
if not para.text.strip():
continue
style = para.style.name.lower()
if 'heading' in style:
level = int(style[-1]) if style[-1].isdigit() else 1
markdown_parts.append(f"{'#' * level} {para.text}\n")
elif para._p.pPr and para._p.pPr.numPr: # List item
marker = self._get_list_marker()
markdown_parts.append(f"{marker} {para.text}\n")
in_list = True
else:
if in_list:
markdown_parts.append("\n")
in_list = False
text = para.text
if self.preserve_formatting:
text = self._apply_formatting(para)
markdown_parts.append(f"{text}\n")
return markdown_parts
def _get_list_marker(self):
return {
'dash': '-',
'asterisk': '*',
'plus': '+'
}.get(self.list_style, '-')
def _apply_formatting(self, paragraph):
text = paragraph.text
if not text:
return ""
runs = paragraph.runs
formatted_parts = []
for run in runs:
part = run.text
if run.bold:
part = f"**{part}**"
if run.italic:
part = f"*{part}*"
if run.underline:
part = f"__{part}__"
formatted_parts.append(part)
return "".join(formatted_parts)
def _extract_title(self, doc):
if doc.paragraphs:
first_para = doc.paragraphs[0]
if 'heading' in first_para.style.name.lower():
return first_para.text.strip()
# Look for first Heading 1 in document
for para in doc.paragraphs:
if para.style.name.lower() == 'heading 1':
return para.text.strip()
return "Untitled Document"
ProcessorRegistry.register("DOCX_PROCESSOR", DocxProcessor)

View File

@@ -24,7 +24,7 @@ class HTMLProcessor(BaseProcessor):
# Add verification logging
self._log(f"HTML Processor initialized with tuning={self.tuning}")
if self.tuning:
self._log_tuning("HTML Processor initialized", {
self.log_tuning("HTML Processor initialized", {
"html_tags": self.html_tags,
"html_end_tags": self.html_end_tags,
"included_elements": self.html_included_elements,
@@ -75,7 +75,7 @@ class HTMLProcessor(BaseProcessor):
title = soup.find('title').get_text(strip=True) if soup.find('title') else ''
self._log(f'Finished parsing HTML for tenant {self.tenant.id}')
self._log_tuning("_parse_html", {"extracted_html": extracted_html, "title": title})
self.log_tuning("_parse_html", {"extracted_html": extracted_html, "title": title})
return extracted_html, title
def _generate_markdown_from_html(self, html_content):
@@ -96,7 +96,7 @@ class HTMLProcessor(BaseProcessor):
input_html = {"html": chunk}
markdown_chunk = chain.invoke(input_html)
markdown_chunks.append(markdown_chunk)
self._log_tuning("_generate_markdown_from_html", {"chunk": chunk, "markdown_chunk": markdown_chunk})
self.log_tuning("_generate_markdown_from_html", {"chunk": chunk, "markdown_chunk": markdown_chunk})
markdown = "\n\n".join(markdown_chunks)
self._log(f'Finished generating markdown from HTML for tenant {self.tenant.id}')

View File

@@ -0,0 +1,48 @@
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
import re
from langchain_core.runnables import RunnablePassthrough
from common.extensions import minio_client
from common.utils.model_utils import create_language_template
from .base_processor import BaseProcessor
from common.utils.business_event_context import current_event
from .processor_registry import ProcessorRegistry
def _find_first_h1(markdown: str) -> str:
# Look for # Header (allowing spaces after #)
match = re.search(r'^#\s+(.+)$', markdown, re.MULTILINE)
return match.group(1).strip() if match else ""
class MarkdownProcessor(BaseProcessor):
def __init__(self, tenant, model_variables, document_version, catalog, processor):
super().__init__(tenant, model_variables, document_version, catalog, processor)
self.chunk_size = catalog.max_chunk_size
self.chunk_overlap = 0
self.tuning = self.processor.tuning
def process(self):
self._log("Starting Markdown processing")
try:
file_data = minio_client.download_document_file(
self.tenant.id,
self.document_version.bucket_name,
self.document_version.object_name,
)
markdown = file_data.decode('utf-8')
title = _find_first_h1(markdown)
self._save_markdown(markdown)
self._log("Finished processing Markdown")
return markdown, title
except Exception as e:
self._log(f"Error processing Markdown: {str(e)}", level='error')
raise
ProcessorRegistry.register("MARKDOWN_PROCESSOR", MarkdownProcessor)

View File

@@ -57,7 +57,7 @@ class PDFProcessor(BaseProcessor):
'figures': self._extract_figures(page, page_num, figure_counter),
'tables': self._extract_tables(page)
}
self._log_tuning("_extract_content", {"page_num": page_num, "page_content": page_content})
self.log_tuning("_extract_content", {"page_num": page_num, "page_content": page_content})
figure_counter += len(page_content['figures'])
extracted_content.append(page_content)
@@ -119,7 +119,7 @@ class PDFProcessor(BaseProcessor):
markdown_table = self._table_to_markdown(table)
if markdown_table: # Only add non-empty tables
tables.append(markdown_table)
self._log_tuning("_extract_tables", {"markdown_table": markdown_table})
self.log_tuning("_extract_tables", {"markdown_table": markdown_table})
except Exception as e:
self._log(f"Error extracting tables from page: {str(e)}", level='error')
return tables

View File

@@ -45,7 +45,7 @@ class TranscriptionBaseProcessor(BaseProcessor):
return text_splitter.split_text(transcription)
def _process_chunks(self, chunks):
self._log_tuning("_process_chunks", {"Nr of Chunks": len(chunks)})
self.log_tuning("_process_chunks", {"Nr of Chunks": len(chunks)})
llm = self.model_variables.get_llm()
template = self.model_variables.get_template('transcript')
language_template = create_language_template(template, self.document_version.language)
@@ -64,7 +64,7 @@ class TranscriptionBaseProcessor(BaseProcessor):
}
markdown = chain.invoke(input_transcript)
markdown = self._clean_markdown(markdown)
self._log_tuning("_process_chunks", {
self.log_tuning("_process_chunks", {
"Chunk Number": f"{i + 1} of {len(chunks)}",
"Chunk": chunk,
"Previous Chunk": previous_part,

View File

@@ -1,3 +1,4 @@
import re
from datetime import datetime as dt, timezone as tz
from celery import states
@@ -23,6 +24,8 @@ from common.utils.business_event_context import current_event
from config.type_defs.processor_types import PROCESSOR_TYPES
from eveai_workers.processors.processor_registry import ProcessorRegistry
from common.utils.config_field_types import json_to_pattern_list
# Healthcheck task
@current_celery.task(name='ping', queue='embeddings')
@@ -99,9 +102,13 @@ def create_embeddings(tenant_id, document_version_id):
processor=processor
)
markdown, title = document_processor.process()
document_processor.log_tuning("Processor returned: ", {
'markdown': markdown,
'title': title
})
with current_event.create_span("Embedding"):
embed_markdown(tenant, model_variables, document_version, catalog, markdown, title)
embed_markdown(tenant, model_variables, document_version, catalog, document_processor, markdown, title)
current_event.log("Finished Embedding Creation Task")
@@ -129,16 +136,19 @@ def delete_embeddings_for_document_version(document_version):
raise
def embed_markdown(tenant, model_variables, document_version, catalog, markdown, title):
def embed_markdown(tenant, model_variables, document_version, catalog, processor, markdown, title):
# Create potential chunks
potential_chunks = create_potential_chunks_for_markdown(tenant.id, document_version, f"{document_version.id}.md")
potential_chunks = create_potential_chunks_for_markdown(tenant.id, document_version, processor, markdown)
processor.log_tuning("Potential Chunks: ", {'potential chunks': potential_chunks})
# Combine chunks for embedding
chunks = combine_chunks_for_markdown(potential_chunks, catalog.min_chunk_size, catalog.max_chunk_size)
chunks = combine_chunks_for_markdown(potential_chunks, catalog.min_chunk_size, catalog.max_chunk_size, processor)
processor.log_tuning("Chunks: ", {'chunks': chunks})
# Enrich chunks
with current_event.create_span("Enrich Chunks"):
enriched_chunks = enrich_chunks(tenant, model_variables, document_version, title, chunks)
processor.log_tuning("Enriched Chunks: ", {'enriched_chunks': enriched_chunks})
# Create embeddings
with current_event.create_span("Create Embeddings"):
@@ -238,23 +248,17 @@ def embed_chunks(tenant, model_variables, document_version, chunks):
return new_embeddings
def create_potential_chunks_for_markdown(tenant_id, document_version, input_file):
def create_potential_chunks_for_markdown(tenant_id, document_version, processor, markdown):
try:
current_app.logger.info(f'Creating potential chunks for tenant {tenant_id}')
markdown_on = document_version.object_name.rsplit('.', 1)[0] + '.md'
# Download the markdown file from MinIO
markdown_data = minio_client.download_document_file(tenant_id,
document_version.bucket_name,
markdown_on,
)
markdown = markdown_data.decode('utf-8')
heading_level = processor.configuration.get('chunking_heading_level', 2)
headers_to_split_on = [
("#", "Header 1"),
("##", "Header 2"),
(f"{'#' * i}", f"Header {i}") for i in range(1, min(heading_level + 1, 7))
]
processor.log_tuning('Headers to split on', {'header list: ': headers_to_split_on})
markdown_splitter = MarkdownHeaderTextSplitter(headers_to_split_on, strip_headers=False)
md_header_splits = markdown_splitter.split_text(markdown)
potential_chunks = [doc.page_content for doc in md_header_splits]
@@ -265,14 +269,61 @@ def create_potential_chunks_for_markdown(tenant_id, document_version, input_file
raise
def combine_chunks_for_markdown(potential_chunks, min_chars, max_chars):
def combine_chunks_for_markdown(potential_chunks, min_chars, max_chars, processor):
actual_chunks = []
current_chunk = ""
current_length = 0
def matches_chunking_pattern(text, patterns):
if not patterns:
return False
# Get the first line of the text
first_line = text.split('\n', 1)[0].strip()
# Check if it's a header at appropriate level
header_match = re.match(r'^(#{1,6})\s+(.+)$', first_line)
if not header_match:
return False
# Get the heading level (number of #s)
header_level = len(header_match.group(1))
# Get the header text
header_text = header_match.group(2)
# Check if header matches any pattern
for pattern in patterns:
try:
processor.log_tuning('Pattern check: ', {
'pattern: ': pattern,
'text': header_text
})
if re.search(pattern, header_text, re.IGNORECASE):
return True
except Exception as e:
current_app.logger.warning(f"Invalid regex pattern '{pattern}': {str(e)}")
continue
return False
chunking_patterns = json_to_pattern_list(processor.configuration.get('chunking_patterns', []))
processor.log_tuning(f'Chunking Patterns Extraction: ', {
'Full Configuration': processor.configuration,
'Chunking Patterns': chunking_patterns,
})
for chunk in potential_chunks:
chunk_length = len(chunk)
# Force new chunk if pattern matches
if chunking_patterns and matches_chunking_pattern(chunk, chunking_patterns):
if current_chunk and current_length >= min_chars:
actual_chunks.append(current_chunk)
current_chunk = chunk
current_length = chunk_length
continue
if current_length + chunk_length > max_chars:
if current_length >= min_chars:
actual_chunks.append(current_chunk)

View File

@@ -0,0 +1,84 @@
#!/bin/bash
# Define the plugin names and paths
PLUGINS=("eveai-chat" "eveai_sync")
RELEASE_PATH="/Volumes/OWC4M2_1/Dropbox/Professioneel/AskEveAI/Production Plugins/WordPress"
# Helper function: Print usage
print_usage() {
echo "Usage: $0 [-b | -r] [plugin_name...]"
echo " -b Build only"
echo " -r Release only"
echo " If no options are provided, both build and release will be performed."
echo " Specify plugin names to target specific plugins. Default is all."
}
# Build function
build_plugin() {
local plugin=$1
local zip_file="${plugin}.zip"
echo "Building plugin: $plugin"
if [ -f "$zip_file" ]; then
echo "Removing existing zip file: $zip_file"
rm "$zip_file"
fi
if [ -d "$plugin" ]; then
zip -r "$zip_file" "$plugin" > /dev/null
echo "Plugin $plugin built successfully: $zip_file"
else
echo "Error: Plugin folder $plugin does not exist. Skipping."
fi
}
# Release function
release_plugin() {
local plugin=$1
local zip_file="${plugin}.zip"
local destination="$RELEASE_PATH/$zip_file"
echo "Releasing plugin: $plugin"
if [ -f "$zip_file" ]; then
cp "$zip_file" "$RELEASE_PATH"
echo "Plugin $plugin released successfully to $destination"
else
echo "Error: Build file $zip_file not found. Please build the plugin first."
fi
}
# Parse arguments
BUILD=false
RELEASE=false
TARGET_PLUGINS=("${PLUGINS[@]}")
while getopts "brh" opt; do
case "$opt" in
b) BUILD=true ;;
r) RELEASE=true ;;
h) print_usage; exit 0 ;;
*) print_usage; exit 1 ;;
esac
done
shift $((OPTIND-1))
# Handle plugin name filtering
if [ "$#" -gt 0 ]; then
TARGET_PLUGINS=("$@")
fi
# Default action: both build and release
if ! $BUILD && ! $RELEASE; then
BUILD=true
RELEASE=true
fi
# Execute actions
for plugin in "${TARGET_PLUGINS[@]}"; do
if $BUILD; then
build_plugin "$plugin"
fi
if $RELEASE; then
release_plugin "$plugin"
fi
done

View File

@@ -40,10 +40,11 @@ class EveAIChatWidget extends HTMLElement {
console.log('Chat Widget Connected');
this.innerHTML = this.getTemplate();
this.setupElements()
this.populateLanguageDropdown()
this.addEventListeners()
if (this.areAllAttributesSet()) {
console.log('All attributes are set, populating language dropdown');
this.populateLanguageDropdown()
console.log('All attributes are set, initializing socket');
this.initializeSocket();
} else {
@@ -194,7 +195,7 @@ class EveAIChatWidget extends HTMLElement {
}
this.socket = io(this.serverUrl, {
path: '/socket.io/',
path: '/chat/socket.io/',
transports: ['websocket'],
query: { // Change from auth to query
token: this.sessionToken

View File

@@ -1,9 +1,9 @@
<?php
/**
* Plugin Name: EveAI Chat Widget
* Description: Integrates the EveAI chat interface into your WordPress site.
* Version: 2.0.16
* Author: Your Company
* Description: Integrates the Ask Eve AI (Evie) chat interface into your WordPress site.
* Version: 2.0.18
* Author: AskEveAI.com
* Text Domain: eveai-chat
* Domain Path: /languages
* Requires at least: 5.8
@@ -15,7 +15,7 @@ if (!defined('WPINC')) {
}
// Define plugin constants
define('EVEAI_CHAT_VERSION', '2.0.16');
define('EVEAI_CHAT_VERSION', '2.0.18');
define('EVEAI_CHAT_PLUGIN_DIR', plugin_dir_path(__FILE__));
define('EVEAI_CHAT_PLUGIN_URL', plugin_dir_url(__FILE__));

View File

@@ -77,6 +77,7 @@ class Assets implements Loadable {
}
private function load_assets() {
error_log('Starting to load EveAI assets');
// Enqueue all required scripts
wp_enqueue_script('socket-io');
wp_enqueue_script('marked');
@@ -93,6 +94,7 @@ class Assets implements Loadable {
'nonce' => wp_create_nonce('wp_rest'),
'settings' => $this->get_public_settings()
]);
error_log('EveAI assets loaded');
}
private function get_public_settings() {

View File

@@ -6,10 +6,10 @@ class RESTController implements Loadable {
public function init() {
add_action('rest_api_init', [$this, 'register_routes']);
error_log('REST routes registered for EveAI Chat');
}
public function register_routes() {
error_log('Attempting to register EveAI REST routes');
register_rest_route(
self::API_NAMESPACE,
'/token',
@@ -39,6 +39,7 @@ class RESTController implements Loadable {
'permission_callback' => [$this, 'verify_request'],
]
);
error_log('EveAI REST routes Registered');
}
public function verify_request(\WP_REST_Request $request): bool {

View File

@@ -117,8 +117,7 @@ class EveAI_API {
throw new Exception("API error ({$error_type}): {$error_message}");
}
return $response_data
// return $body;
return $body;
} catch (Exception $e) {
error_log("EveAI API Exception: " . $e->getMessage());
throw $e;

View File

@@ -0,0 +1,4 @@
{
"id": 216725,
"key": "App216725"
}

View File

@@ -0,0 +1,24 @@
# eveai_integration
This Zapier integration project is generated by the `zapier init` CLI command.
These are what you normally do next:
```bash
# Install dependencies
npm install # or you can use yarn
# Run tests
zapier test
# Register the integration on Zapier if you haven't
zapier register "App Title"
# Or you can link to an existing integration on Zapier
zapier link
# Push it to Zapier
zapier push
```
Find out more on the latest docs: https://github.com/zapier/zapier-platform/blob/main/packages/cli/README.md.

View File

@@ -0,0 +1,93 @@
// api_client.js
const BASE_URL = 'https://evie.askeveai.com/api/api/v1';
class EveAIApiClient {
constructor(z, bundle) {
this.z = z;
this.bundle = bundle;
}
async ensure_valid_token() {
const currentTime = Math.floor(Date.now() / 1000); // Current time in seconds
const token = this.bundle.authData.access_token;
const tokenExpiry = this.bundle.authData.token_expiry;
// Check if token is expired or will expire in next 30 seconds
if (!token || !tokenExpiry || currentTime + 30 >= tokenExpiry) {
this.z.console.log('Token missing or expiring soon, requesting new token...');
const response = await this.z.request({
url: `${BASE_URL}/auth/token`,
method: 'POST',
body: {
tenant_id: this.bundle.authData.tenant_id,
api_key: this.bundle.authData.api_key,
},
});
if (response.status !== 200) {
throw new Error(`Failed to get access token: ${response.status}`);
}
const data = response.json;
// Update the bundle's authData
this.bundle.authData.access_token = data.access_token;
this.bundle.authData.token_expiry = currentTime + data.expires_in;
this.z.console.log('New token obtained:', {
token_prefix: data.access_token.substring(0, 10) + '...',
expires_in: data.expires_in,
expiry_time: this.bundle.authData.token_expiry
});
return data.access_token;
}
this.z.console.log('Using existing valid token');
return token;
}
async make_request(method, endpoint, data = null) {
try {
// Ensure we have a valid token
const token = await this.ensure_valid_token();
this.z.console.log('Making request:', {
method,
endpoint,
token_prefix: token.substring(0, 10) + '...'
});
const requestConfig = {
url: `${BASE_URL}${endpoint}`,
method,
headers: {
'Authorization': `Bearer ${token}`,
'Content-Type': 'application/json',
},
};
if (data) {
requestConfig.body = data;
}
const response = await this.z.request(requestConfig);
this.z.console.log('Response received:', {
status: response.status,
data: response.json
});
return response.json;
} catch (error) {
this.z.console.error('Request failed:', {
error: error.message,
response: error.response
});
throw error;
}
}
}
module.exports = EveAIApiClient;

View File

@@ -0,0 +1,140 @@
const EveAIApiClient = require('./api_client');
const handleError = (z, error) => {
// Log the full error for debugging
z.console.error('Authentication error:', {
message: error.message,
response: error.response ? {
status: error.response.status,
data: error.response.data,
headers: error.response.headers
} : 'No response',
stack: error.stack
});
// If we have a response with a message from the API, use it
if (error.response) {
if (error.response.status) {
switch (error.response.status) {
case 400:
throw new Error('Invalid request. Please verify your Tenant ID and API Key format.');
case 401:
throw new Error('Authentication failed. Please verify your credentials and ensure your API Key is active.');
case 403:
throw new Error('Access forbidden. Your API Key may not have the required permissions.');
case 404:
throw new Error('Authentication service not found. Please verify the API URL.');
case 429:
throw new Error('Too many authentication attempts. Please try again later.');
case 500:
throw new Error('EveAI server encountered an error. Please try again later.');
default:
throw new Error(`Unexpected error (${error.response.status}). Please contact support if this persists.`);
}
}
// If we have error data but no status, try to use the error message
if (error.response.data) {
if (error.response.data.message) {
throw new Error(`API Error: ${error.response.data.message}`);
}
if (typeof error.response.data === 'string') {
throw new Error(`API Error: ${error.response.data}`);
}
}
}
// Handle network errors
if (error.message.includes('ECONNREFUSED')) {
throw new Error('Unable to connect to EveAI. Please check your network connection.');
}
if (error.message.includes('ETIMEDOUT')) {
throw new Error('Connection to EveAI timed out. Please try again.');
}
// Generic error for unhandled cases
throw new Error(`Authentication failed: ${error.message}`);
};
const testAuth = async (z, bundle) => {
try {
const client = new EveAIApiClient(z, bundle);
await client.ensure_valid_token();
// Make a test request to verify the token works
const response = await client.make_request('GET', '/auth/verify');
// Log successful authentication
z.console.log('Authentication successful', {
tenant_id: bundle.authData.tenant_id,
token_prefix: bundle.authData.access_token.substring(0, 10) + '...',
expiry: new Date(bundle.authData.token_expiry * 1000).toISOString()
});
return response;
} catch (error) {
// Use our enhanced error handler
handleError(z, error);
}
};
module.exports = {
type: 'session',
fields: [
{
helpText:
"The tenant_id provided to you by email, or to be created / retrieved in Evie's administrative interface (https://evie.askeveai.com/admin)",
computed: false,
key: 'tenant_id',
required: true,
label: 'Tenant ID',
type: 'string',
},
{
helpText:
"The api_key provided to you by email, or to be created / retrieved in Evie's administrative interface (https://evie.askeveai.com/admin)",
computed: false,
key: 'api_key',
required: true,
label: 'API Key',
type: 'password',
},
],
sessionConfig: {
perform: async (z, bundle) => {
try {
const client = new EveAIApiClient(z, bundle);
const token = await client.ensure_valid_token();
z.console.log('Session token obtained', {
token_prefix: token.substring(0, 10) + '...',
expiry: new Date(bundle.authData.token_expiry * 1000).toISOString()
});
return {
access_token: token,
token_expiry: bundle.authData.token_expiry
};
} catch (error) {
handleError(z, error);
}
}
},
test: testAuth,
};

View File

@@ -0,0 +1,262 @@
const EveAIApiClient = require('../api_client');
module.exports = {
display: {
description: "This action uploads a new document to Evie's Library",
hidden: false,
label: 'Add a new document to Evie',
},
key: 'add_document',
noun: 'Document',
operation: {
inputFields: [
{
key: 'catalog_id',
label: 'Catalog ID',
type: 'integer',
helpText:
"The ID of the Catalog in Evie's Library you want to add the document to.",
required: true,
list: false,
altersDynamicFields: false,
},
{
key: 'file',
label: 'The File Content',
type: 'file',
helpText:
"The content of the file that needs to uploaded and indexed in Evie's Library",
required: true,
list: false,
altersDynamicFields: false,
},
{
key: 'name',
label: 'Document Name',
type: 'string',
helpText: 'The name you want to give the Document.',
required: true,
list: false,
altersDynamicFields: false,
},
{
key: 'language',
label: 'Document Language',
type: 'string',
default: 'en',
helpText: 'Two-letter-code of the language the document is written in.',
required: true,
list: false,
altersDynamicFields: false,
},
{
key: 'user_context',
label: 'User Context',
type: 'text',
helpText:
'Contextual information you want to add to the Document. If you have structured information to be shared, you can better add this information to the User Metadata, which allows for json to be uploaded.',
required: false,
list: false,
altersDynamicFields: false,
},
{
key: 'valid_from',
label: 'Valid From',
type: 'datetime',
helpText:
'The moment this document is valid. When no value is given, the current data will be used.',
required: false,
list: false,
altersDynamicFields: false,
},
{
key: 'metadata_service',
label: 'Service',
type: 'string',
default: 'Zapier',
helpText: "By default we use 'Zapier' as service name. However, if you need to change that to e.g. give an indication of the Zapier flow, you can change this value.",
required: true,
},
{
key: 'metadata_source',
label: 'Source App',
type: 'string',
helpText: "The source app of the document's origin. e.g. 'Dropbox' if the document is provided through Dropbox, or 'Google Docs' if that happens to be the origin of the document.",
required: false,
},
{
key: 'metadata_unique_id',
label: 'Unique ID',
type: 'string',
helpText: 'An unique identifier, provided by the source system, if that is available.',
required: false,
},
{
key: 'metadata_unique_url',
label: 'Unique URL',
type: 'string',
helpText: "A unique URL that is provided by the source system, if that's available",
required: false,
},
{
key: 'additional_metadata',
label: 'Additional Metadata',
helpText: "Extra metadata you'd like to add to the document",
dict: true,
required: false,
altersDynamicFields: false,
},
{
key: 'catalog_properties',
label: 'Catalog Properties',
helpText:
'Depending on the Catalog ID provided, you can add the required key-value pairs here.',
dict: true,
required: false,
altersDynamicFields: false,
},
],
perform: async (z, bundle) => {
try {
z.console.log("Starting New Log Trace for add_document");
z.console.log("=======================================");
const client = new EveAIApiClient(z, bundle);
// Prepare base metadata
const baseMetadata = {
service: bundle.inputData.metadata_service || 'Zapier',
source: bundle.inputData.metadata_source || '',
unique_id: bundle.inputData.metadata_unique_id || '',
unique_url: bundle.inputData.metadata_unique_url || '',
};
// If there's additional metadata, merge it
if (bundle.inputData.additional_metadata) {
Object.assign(baseMetadata, bundle.inputData.additional_metadata);
}
// Get the file URL from Zapier
const tempFileUrl = await z.stashFile(bundle.inputData.file);
// Log the temporary URL for debugging
z.console.log('Temporary URL created:', tempFileUrl);
// Create request data as an object
const requestData = {
catalog_id: bundle.inputData.catalog_id,
language: bundle.inputData.language,
temp_url: tempFileUrl,
user_metadata: JSON.stringify(baseMetadata),
};
// Add name property if it exists
if (bundle.inputData.name) {
requestData.name = bundle.inputData.name;
}
// Add user_context property if it exists
if (bundle.inputData.user_context) {
requestData.user_context = bundle.inputData.user_context;
}
// Add valid_from property if it exists
if (bundle.inputData.valid_from) {
requestData.valid_from = bundle.inputData.valid_from;
}
// Add catalog properties if they exist
if (bundle.inputData.catalog_properties) {
requestData.catalog_properties = JSON.stringify(bundle.inputData.catalog_properties);
}
// Make request to API
return await client.make_request('POST', '/documents/add_document_through_url', requestData);
} catch (error) {
// Enhanced error logging
z.console.error('Error details:', {
message: error.message,
response: error.response ? {
status: error.response.status,
headers: error.response.headers,
data: error.response.data
} : 'No response',
request: error.request ? {
method: error.request.method,
url: error.request.url,
headers: error.request.headers
} : 'No request'
});
throw error;
}
}
// perform: async (z, bundle) => {
// try {
// z.console.log("Starting New Log Trace for add_document")
// z.console.log("=======================================")
//
// // Prepare base metadata
// const baseMetadata = {
// service: bundle.inputData.metadata_service || 'Zapier',
// source: bundle.inputData.metadata_source,
// unique_id: bundle.inputData.metadata_unique_id,
// unique_url: bundle.inputData.metadata_unique_url,
// };
//
// // If there's additional metadata, merge it with the base metadata
// if (bundle.inputData.additional_metadata) {
// Object.assign(baseMetadata, bundle.inputData.additional_metadata);
// }
//
// const requestData = {
//
// catalog_id: bundle.inputData.catalog_id,
// language: bundle.inputData.language,
//
// // Add optional fields if they exist
// name: bundle.inputData.name || undefined,
// user_context: bundle.inputData.user_context || undefined,
// valid_from: bundle.inputData.valid_from || undefined,
// user_metadata: JSON.stringify(baseMetadata),
// catalog_properties: JSON.stringify(bundle.inputData.catalog_properties) || undefined,
// file: z.stashFile(bundle.inputData.file),
// }
//
// // Make request to your API
// const response = await z.request({
// url: 'https://evie.askeveai.com/api/api/v1/documents/add_document',
// method: 'POST',
// body: requestData,
// headers: {
// 'Authorization': `Bearer ${bundle.authData.access_token}`,
// 'Content-Type': 'multipart/form-data',
// },
// });
//
// // Log the response for debugging
// z.console.log('API Response:', {
// status: response.status,
// body: response.data
// });
// // Return the parsed response
// return response.json;
// } catch (error) {
// // Enhanced error logging
// z.console.error('Error details:', {
// message: error.message,
// response: error.response ? {
// status: error.response.status,
// headers: error.response.headers,
// data: error.response.data
// } : 'No response',
// request: error.request ? {
// method: error.request.method,
// url: error.request.url,
// headers: error.request.headers
// } : 'No request'
// });
// throw error;
// }
// }
}
};

View File

@@ -0,0 +1,25 @@
const authentication = require('./authentication');
const addDocument = require('./creates/add_document');
module.exports = {
// This is just shorthand to reference the installed dependencies you have.
// Zapier will need to know these before we can upload.
version: require('./package.json').version,
platformVersion: require('zapier-platform-core').version,
// Register the authentication
authentication: authentication,
// If you want your trigger to show up, you better include it here!
triggers: {},
// If you want your searches to show up, you better include it here!
searches: {},
// If you want your creates to show up, you better include it here!
creates: {
[addDocument.key]: addDocument
},
resources: {},
};

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,16 @@
{
"name": "eveai_integration",
"version": "1.0.3",
"description": "",
"main": "index.js",
"scripts": {
"test": "jest --testTimeout 10000"
},
"dependencies": {
"zapier-platform-core": "15.19.0"
},
"devDependencies": {
"jest": "^29.6.0"
},
"private": true
}

View File

@@ -76,6 +76,7 @@ http {
location /chat/ {
proxy_pass http://eveai_chat:5002/;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
@@ -84,6 +85,12 @@ http {
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
proxy_buffering off;
# Add CORS headers
add_header 'Access-Control-Allow-Origin' '*' always;
add_header 'Access-Control-Allow-Methods' 'GET, POST, OPTIONS' always;
add_header 'Access-Control-Allow-Headers' 'DNT,X-CustomHeader,Keep-Alive,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Authorization' always;
add_header 'Access-Control-Allow-Credentials' 'true' always;
}
location /admin/ {

View File

@@ -31,13 +31,13 @@ langchain-anthropic~=0.2.0
langchain-community~=0.3.0
langchain-core~=0.3.0
langchain-mistralai~=0.2.0
langchain-openai~=0.2.0
langchain-openai~=0.2.10
langchain-postgres~=0.0.12
langchain-text-splitters~=0.3.0
langcodes~=3.4.0
langdetect~=1.0.9
langsmith~=0.1.81
openai~=1.45.1
openai~=1.55.3
pg8000~=1.31.2
pgvector~=0.2.5
pycryptodome~=3.20.0
@@ -88,4 +88,5 @@ typing_extensions~=4.12.2
prometheus_flask_exporter~=0.23.1
prometheus_client~=0.20.0
babel~=2.16.0
dogpile.cache~=1.3.3
dogpile.cache~=1.3.3
python-docx~=1.1.2