Files
eveAI/common/models/document.py
Josako 25213f2004 - Implementation of specialist execution api, including SSE protocol
- eveai_chat becomes deprecated and should be replaced with SSE
- Adaptation of STANDARD_RAG specialist
- Base class definition allowing to realise specialists with crewai framework
- Implementation of SPIN_SPECIALIST
- Implementation of test app for testing specialists (test_specialist_client). Also serves as an example for future SSE-based client
- Improvements to startup scripts to better handle and scale multiple connections
- Small improvements to the interaction forms and views
- Caching implementation improved and augmented with additional caches
2025-02-20 05:50:16 +01:00

182 lines
7.5 KiB
Python

from common.extensions import db
from .user import User, Tenant
from pgvector.sqlalchemy import Vector
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.dialects.postgresql import ARRAY
import sqlalchemy as sa
class Catalog(db.Model):
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String(50), nullable=False)
description = db.Column(db.Text, nullable=True)
type = db.Column(db.String(50), nullable=False, default="STANDARD_CATALOG")
min_chunk_size = db.Column(db.Integer, nullable=True, default=2000)
max_chunk_size = db.Column(db.Integer, nullable=True, default=3000)
# Meta Data
user_metadata = db.Column(JSONB, nullable=True)
system_metadata = db.Column(JSONB, nullable=True)
configuration = db.Column(JSONB, nullable=True)
# Versioning Information
created_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now())
created_by = db.Column(db.Integer, db.ForeignKey(User.id), nullable=True)
updated_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now(), onupdate=db.func.now())
updated_by = db.Column(db.Integer, db.ForeignKey(User.id))
class Processor(db.Model):
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String(50), nullable=False)
description = db.Column(db.Text, nullable=True)
catalog_id = db.Column(db.Integer, db.ForeignKey('catalog.id'), nullable=True)
type = db.Column(db.String(50), nullable=False)
sub_file_type = db.Column(db.String(50), nullable=True)
# Tuning enablers
tuning = db.Column(db.Boolean, nullable=True, default=False)
# Meta Data
user_metadata = db.Column(JSONB, nullable=True)
system_metadata = db.Column(JSONB, nullable=True)
configuration = db.Column(JSONB, nullable=True)
# Versioning Information
created_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now())
created_by = db.Column(db.Integer, db.ForeignKey(User.id), nullable=True)
updated_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now(), onupdate=db.func.now())
updated_by = db.Column(db.Integer, db.ForeignKey(User.id))
class Retriever(db.Model):
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String(50), nullable=False)
description = db.Column(db.Text, nullable=True)
catalog_id = db.Column(db.Integer, db.ForeignKey('catalog.id'), nullable=True)
type = db.Column(db.String(50), nullable=False, default="STANDARD_RAG")
type_version = db.Column(db.String(20), nullable=True, default="STANDARD_RAG")
tuning = db.Column(db.Boolean, nullable=True, default=False)
# Meta Data
user_metadata = db.Column(JSONB, nullable=True)
system_metadata = db.Column(JSONB, nullable=True)
configuration = db.Column(JSONB, nullable=True)
arguments = db.Column(JSONB, nullable=True)
# Versioning Information
created_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now())
created_by = db.Column(db.Integer, db.ForeignKey(User.id), nullable=True)
updated_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now(), onupdate=db.func.now())
updated_by = db.Column(db.Integer, db.ForeignKey(User.id))
class Document(db.Model):
id = db.Column(db.Integer, primary_key=True)
# tenant_id = db.Column(db.Integer, db.ForeignKey(Tenant.id), nullable=False)
catalog_id = db.Column(db.Integer, db.ForeignKey(Catalog.id), nullable=True)
name = db.Column(db.String(100), nullable=False)
valid_from = db.Column(db.DateTime, nullable=True)
valid_to = db.Column(db.DateTime, nullable=True)
# Versioning Information
created_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now())
created_by = db.Column(db.Integer, db.ForeignKey(User.id), nullable=True)
updated_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now(), onupdate=db.func.now())
updated_by = db.Column(db.Integer, db.ForeignKey(User.id))
# Relations
versions = db.relationship('DocumentVersion', backref='document', lazy=True)
def __repr__(self):
return f"<Document {self.id}: {self.name}>"
class DocumentVersion(db.Model):
id = db.Column(db.Integer, primary_key=True)
doc_id = db.Column(db.Integer, db.ForeignKey(Document.id), nullable=False)
url = db.Column(db.String(200), nullable=True)
bucket_name = db.Column(db.String(255), nullable=True)
object_name = db.Column(db.String(200), nullable=True)
file_type = db.Column(db.String(20), nullable=True)
sub_file_type = db.Column(db.String(50), nullable=True)
file_size = db.Column(db.Float, nullable=True)
language = db.Column(db.String(2), nullable=False)
user_context = db.Column(db.Text, nullable=True)
system_context = db.Column(db.Text, nullable=True)
user_metadata = db.Column(JSONB, nullable=True)
system_metadata = db.Column(JSONB, nullable=True)
catalog_properties = db.Column(JSONB, nullable=True)
# Versioning Information
created_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now())
created_by = db.Column(db.Integer, db.ForeignKey(User.id))
updated_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now(), onupdate=db.func.now())
updated_by = db.Column(db.Integer, db.ForeignKey(User.id))
# Processing Information
processing = db.Column(db.Boolean, nullable=False, default=False)
processing_started_at = db.Column(db.DateTime, nullable=True)
processing_finished_at = db.Column(db.DateTime, nullable=True)
processing_error = db.Column(db.String(255), nullable=True)
# Relations
embeddings = db.relationship('Embedding', backref='document_version', lazy=True)
def __repr__(self):
return f"<DocumentVersion {self.document_language.document_id}.{self.document_language.language}>.{self.id}>"
class Embedding(db.Model):
__tablename__ = 'embeddings'
id = db.Column(db.Integer, primary_key=True)
type = db.Column(db.String(30), nullable=False)
doc_vers_id = db.Column(db.Integer, db.ForeignKey(DocumentVersion.id), nullable=False)
active = db.Column(db.Boolean, nullable=False, default=True)
chunk = db.Column(db.Text, nullable=False)
__mapper_args__ = {
'polymorphic_identity': 'embedding',
'polymorphic_on': type
}
class EmbeddingMistral(Embedding):
__tablename__ = 'embedding_mistral'
id = db.Column(db.Integer, db.ForeignKey('embeddings.id'), primary_key=True)
# 1024 is the MISTRAL Embedding dimension.
# If another embedding model is chosen, this dimension may need to be changed.
embedding = db.Column(Vector(1024), nullable=False)
__mapper_args__ = {
'polymorphic_identity': 'embedding_mistral',
}
class EmbeddingSmallOpenAI(Embedding):
__tablename__ = 'embedding_small_openai'
id = db.Column(db.Integer, db.ForeignKey('embeddings.id'), primary_key=True)
# 1536 is the OpenAI Small Embedding dimension.
# If another embedding model is chosen, this dimension may need to be changed.
embedding = db.Column(Vector(1536), nullable=False)
__mapper_args__ = {
'polymorphic_identity': 'embedding_small_openai',
}
class EmbeddingLargeOpenAI(Embedding):
__tablename__ = 'embedding_large_openai'
id = db.Column(db.Integer, db.ForeignKey('embeddings.id'), primary_key=True)
# 3072 is the OpenAI Large Embedding dimension.
# If another embedding model is chosen, this dimension may need to be changed.
embedding = db.Column(Vector(3072), nullable=False)
__mapper_args__ = {
'polymorphic_identity': 'embedding_large_openai',
}