106 lines
4.4 KiB
Python
106 lines
4.4 KiB
Python
from common.extensions import db
|
|
from .user import User, Tenant
|
|
from pgvector.sqlalchemy import Vector
|
|
|
|
|
|
class Document(db.Model):
|
|
id = db.Column(db.Integer, primary_key=True)
|
|
name = db.Column(db.String(100), nullable=False)
|
|
tenant_id = db.Column(db.Integer, db.ForeignKey(Tenant.id), nullable=False)
|
|
valid_from = db.Column(db.DateTime, nullable=True)
|
|
valid_to = db.Column(db.DateTime, nullable=True)
|
|
|
|
# Versioning Information
|
|
created_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now())
|
|
created_by = db.Column(db.Integer, db.ForeignKey(User.id), nullable=False)
|
|
updated_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now(), onupdate=db.func.now())
|
|
updated_by = db.Column(db.Integer, db.ForeignKey(User.id))
|
|
|
|
# Relations
|
|
languages = db.relationship('DocumentLanguage', backref='document', lazy=True)
|
|
|
|
def __repr__(self):
|
|
return f"<Document {self.id}: {self.name}>"
|
|
|
|
|
|
class DocumentLanguage(db.Model):
|
|
id = db.Column(db.Integer, primary_key=True)
|
|
document_id = db.Column(db.Integer, db.ForeignKey(Document.id), nullable=False)
|
|
language = db.Column(db.String(2), nullable=False)
|
|
latest_version_id = db.Column(db.Integer, db.ForeignKey('document_version.id'), nullable=True)
|
|
|
|
# Versioning Information
|
|
created_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now())
|
|
created_by = db.Column(db.Integer, db.ForeignKey(User.id), nullable=False)
|
|
updated_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now(), onupdate=db.func.now())
|
|
updated_by = db.Column(db.Integer, db.ForeignKey(User.id))
|
|
|
|
# Relations
|
|
versions = db.relationship(
|
|
'DocumentVersion',
|
|
backref='document_language',
|
|
lazy='joined',
|
|
foreign_keys='DocumentVersion.doc_lang_id'
|
|
)
|
|
latest_version = db.relationship(
|
|
'DocumentVersion',
|
|
uselist=False,
|
|
foreign_keys=[latest_version_id]
|
|
)
|
|
|
|
def __repr__(self):
|
|
return f"<DocumentLanguage {self.document_id}.{self.language}>"
|
|
|
|
|
|
class DocumentVersion(db.Model):
|
|
id = db.Column(db.Integer, primary_key=True)
|
|
doc_lang_id = db.Column(db.Integer, db.ForeignKey(DocumentLanguage.id), nullable=False)
|
|
url = db.Column(db.String(200), nullable=True)
|
|
file_location = db.Column(db.String(255), nullable=True)
|
|
file_name = db.Column(db.String(200), nullable=True)
|
|
file_type = db.Column(db.String(20), nullable=True)
|
|
|
|
# Versioning Information
|
|
created_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now())
|
|
created_by = db.Column(db.Integer, db.ForeignKey(User.id))
|
|
updated_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now(), onupdate=db.func.now())
|
|
updated_by = db.Column(db.Integer, db.ForeignKey(User.id))
|
|
|
|
# Processing Information
|
|
processing = db.Column(db.Boolean, nullable=False, default=False)
|
|
processing_started_at = db.Column(db.DateTime, nullable=True)
|
|
processing_finished_at = db.Column(db.DateTime, nullable=True)
|
|
processing_error = db.Column(db.String(255), nullable=True)
|
|
|
|
# Relations
|
|
embeddings = db.relationship('EmbeddingMistral', backref='document_version', lazy=True)
|
|
|
|
def __repr__(self):
|
|
return f"<DocumentVersion {self.document_language.document_id}.{self.document_language.language}>.{self.id}>"
|
|
|
|
def calc_file_location(self):
|
|
return f"{self.document_language.document.tenant_id}/{self.document_language.document.id}/{self.document_language.language}"
|
|
|
|
def calc_file_name(self):
|
|
return f"{self.id}.{self.file_type}"
|
|
|
|
|
|
class EmbeddingMistral(db.Model):
|
|
id = db.Column(db.Integer, primary_key=True)
|
|
doc_vers_id = db.Column(db.Integer, db.ForeignKey(DocumentVersion.id), nullable=False)
|
|
active = db.Column(db.Boolean, nullable=False, default=True)
|
|
|
|
# 1024 is the MISTRAL Embedding dimension.
|
|
# If another embedding model is chosen, this dimension may need to be changed.
|
|
embedding = db.Column(Vector(1024), nullable=False)
|
|
|
|
|
|
class EmbeddingSmallOpenAI(db.Model):
|
|
id = db.Column(db.Integer, primary_key=True)
|
|
doc_vers_id = db.Column(db.Integer, db.ForeignKey(DocumentVersion.id), nullable=False)
|
|
active = db.Column(db.Boolean, nullable=False, default=True)
|
|
|
|
# 1536 is the OpenAI Small Embedding dimension.
|
|
# If another embedding model is chosen, this dimension may need to be changed.
|
|
embedding = db.Column(Vector(1536), nullable=False)
|