Refactoring part 1
Some changes for workers, but stopped due to refactoring
This commit is contained in:
105
common/models/document.py
Normal file
105
common/models/document.py
Normal file
@@ -0,0 +1,105 @@
|
||||
from common.extensions import db
|
||||
from .user import User, Tenant
|
||||
from pgvector.sqlalchemy import Vector
|
||||
|
||||
|
||||
class Document(db.Model):
|
||||
id = db.Column(db.Integer, primary_key=True)
|
||||
name = db.Column(db.String(100), nullable=False)
|
||||
tenant_id = db.Column(db.Integer, db.ForeignKey(Tenant.id), nullable=False)
|
||||
valid_from = db.Column(db.DateTime, nullable=True)
|
||||
valid_to = db.Column(db.DateTime, nullable=True)
|
||||
|
||||
# Versioning Information
|
||||
created_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now())
|
||||
created_by = db.Column(db.Integer, db.ForeignKey(User.id), nullable=False)
|
||||
updated_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now(), onupdate=db.func.now())
|
||||
updated_by = db.Column(db.Integer, db.ForeignKey(User.id))
|
||||
|
||||
# Relations
|
||||
languages = db.relationship('DocumentLanguage', backref='document', lazy=True)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<Document {self.id}: {self.name}>"
|
||||
|
||||
|
||||
class DocumentLanguage(db.Model):
|
||||
id = db.Column(db.Integer, primary_key=True)
|
||||
document_id = db.Column(db.Integer, db.ForeignKey(Document.id), nullable=False)
|
||||
language = db.Column(db.String(2), nullable=False)
|
||||
latest_version_id = db.Column(db.Integer, db.ForeignKey('document_version.id'), nullable=True)
|
||||
|
||||
# Versioning Information
|
||||
created_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now())
|
||||
created_by = db.Column(db.Integer, db.ForeignKey(User.id), nullable=False)
|
||||
updated_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now(), onupdate=db.func.now())
|
||||
updated_by = db.Column(db.Integer, db.ForeignKey(User.id))
|
||||
|
||||
# Relations
|
||||
versions = db.relationship(
|
||||
'DocumentVersion',
|
||||
backref='document_language',
|
||||
lazy='joined',
|
||||
foreign_keys='DocumentVersion.doc_lang_id'
|
||||
)
|
||||
latest_version = db.relationship(
|
||||
'DocumentVersion',
|
||||
uselist=False,
|
||||
foreign_keys=[latest_version_id]
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<DocumentLanguage {self.document_id}.{self.language}>"
|
||||
|
||||
|
||||
class DocumentVersion(db.Model):
|
||||
id = db.Column(db.Integer, primary_key=True)
|
||||
doc_lang_id = db.Column(db.Integer, db.ForeignKey(DocumentLanguage.id), nullable=False)
|
||||
url = db.Column(db.String(200), nullable=True)
|
||||
file_location = db.Column(db.String(255), nullable=True)
|
||||
file_name = db.Column(db.String(200), nullable=True)
|
||||
file_type = db.Column(db.String(20), nullable=True)
|
||||
|
||||
# Versioning Information
|
||||
created_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now())
|
||||
created_by = db.Column(db.Integer, db.ForeignKey(User.id))
|
||||
updated_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now(), onupdate=db.func.now())
|
||||
updated_by = db.Column(db.Integer, db.ForeignKey(User.id))
|
||||
|
||||
# Processing Information
|
||||
processing = db.Column(db.Boolean, nullable=False, default=False)
|
||||
processing_started_at = db.Column(db.DateTime, nullable=True)
|
||||
processing_finished_at = db.Column(db.DateTime, nullable=True)
|
||||
processing_error = db.Column(db.String(255), nullable=True)
|
||||
|
||||
# Relations
|
||||
embeddings = db.relationship('EmbeddingMistral', backref='document_version', lazy=True)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<DocumentVersion {self.document_language.document_id}.{self.document_language.language}>.{self.id}>"
|
||||
|
||||
def calc_file_location(self):
|
||||
return f"{self.document_language.document.tenant_id}/{self.document_language.document.id}/{self.document_language.language}"
|
||||
|
||||
def calc_file_name(self):
|
||||
return f"{self.id}.{self.file_type}"
|
||||
|
||||
|
||||
class EmbeddingMistral(db.Model):
|
||||
id = db.Column(db.Integer, primary_key=True)
|
||||
doc_vers_id = db.Column(db.Integer, db.ForeignKey(DocumentVersion.id), nullable=False)
|
||||
active = db.Column(db.Boolean, nullable=False, default=True)
|
||||
|
||||
# 1024 is the MISTRAL Embedding dimension.
|
||||
# If another embedding model is chosen, this dimension may need to be changed.
|
||||
embedding = db.Column(Vector(1024), nullable=False)
|
||||
|
||||
|
||||
class EmbeddingSmallOpenAI(db.Model):
|
||||
id = db.Column(db.Integer, primary_key=True)
|
||||
doc_vers_id = db.Column(db.Integer, db.ForeignKey(DocumentVersion.id), nullable=False)
|
||||
active = db.Column(db.Boolean, nullable=False, default=True)
|
||||
|
||||
# 1536 is the OpenAI Small Embedding dimension.
|
||||
# If another embedding model is chosen, this dimension may need to be changed.
|
||||
embedding = db.Column(Vector(1536), nullable=False)
|
||||
Reference in New Issue
Block a user