- License Usage Calculation realised

- View License Usages
- Celery Beat container added
- First schedule in Celery Beat for calculating usage (hourly)
- repopack can now split for different components
- Various fixes as consequece of changing file_location / file_name ==> bucket_name / object_name
- Celery Routing / Queuing updated
This commit is contained in:
Josako
2024-10-11 16:33:36 +02:00
parent 5ffad160b1
commit 9f5f090f0c
57 changed files with 935 additions and 174 deletions

View File

@@ -5,13 +5,14 @@ from datetime import datetime as dt, timezone as tz, datetime
from celery import states
from dateutil.relativedelta import relativedelta
from flask import current_app
from sqlalchemy import or_, and_
from sqlalchemy import or_, and_, text
from sqlalchemy.exc import SQLAlchemyError
from common.extensions import db
from common.models.user import Tenant
from common.models.entitlements import BusinessEventLog, LicenseUsage, License
from common.utils.celery_utils import current_celery
from common.utils.eveai_exceptions import EveAINoLicenseForTenant
from common.utils.eveai_exceptions import EveAINoLicenseForTenant, EveAIException
from common.utils.database import Database
# Healthcheck task
@@ -24,32 +25,54 @@ def ping():
def update_usages():
current_timestamp = dt.now(tz.utc)
tenant_ids = get_all_tenant_ids()
# List to collect all errors
error_list = []
for tenant_id in tenant_ids:
tenant = Tenant.query.get(tenant_id)
if tenant.storage_dirty:
recalculate_storage_for_tenant(tenant)
check_and_create_license_usage_for_tenant(tenant_id)
logs = get_logs_for_processing(tenant_id, current_timestamp)
if not logs:
continue # If no logs to be processed, continu to the next tenant
try:
Database(tenant_id).switch_schema()
check_and_create_license_usage_for_tenant(tenant_id)
tenant = Tenant.query.get(tenant_id)
if tenant.storage_dirty:
recalculate_storage_for_tenant(tenant)
logs = get_logs_for_processing(tenant_id, current_timestamp)
if not logs:
continue # If no logs to be processed, continu to the next tenant
# Get the min and max timestamp from the logs
min_timestamp = min(log.timestamp for log in logs)
max_timestamp = max(log.timestamp for log in logs)
# Get the min and max timestamp from the logs
min_timestamp = min(log.timestamp for log in logs)
max_timestamp = max(log.timestamp for log in logs)
# Retrieve relevant LicenseUsage records
license_usages = get_relevant_license_usages(db.session, tenant_id, min_timestamp, max_timestamp)
# Retrieve relevant LicenseUsage records
current_app.logger.debug(f"Searching relevant usages for tenant {tenant_id}")
license_usages = get_relevant_license_usages(db.session, tenant_id, min_timestamp, max_timestamp)
current_app.logger.debug(f"Found {license_usages}, end searching relevant usages for tenant {tenant_id}")
# Split logs based on LicenseUsage periods
logs_by_usage = split_logs_by_license_usage(logs, license_usages)
# Split logs based on LicenseUsage periods
current_app.logger.debug(f"Splitting usages for tenant {tenant_id}")
logs_by_usage = split_logs_by_license_usage(logs, license_usages)
current_app.logger.debug(f"Found {logs_by_usage}, end splitting logs for tenant {tenant_id}")
# Now you can process logs for each LicenseUsage
for license_usage_id, logs in logs_by_usage.items():
process_logs_for_license_usage(tenant_id, license_usage_id, logs)
# Now you can process logs for each LicenseUsage
for license_usage_id, logs in logs_by_usage.items():
current_app.logger.debug(f"Processing logs for usage id {license_usage_id} for tenant {tenant_id}")
process_logs_for_license_usage(tenant_id, license_usage_id, logs)
current_app.logger.debug(f"Finished processing logs for tenant {tenant_id}")
except Exception as e:
error = f"Usage Calculation error for Tenant {tenant_id}: {e}"
error_list.append(error)
current_app.logger.error(error)
continue
if error_list:
raise Exception('\n'.join(error_list))
return "Update Usages taks completed successfully"
def get_all_tenant_ids():
tenant_ids = db.session.query(Tenant.tenant_id).all()
tenant_ids = db.session.query(Tenant.id).all()
return [tenant_id[0] for tenant_id in tenant_ids] # Extract tenant_id from tuples
@@ -57,21 +80,21 @@ def check_and_create_license_usage_for_tenant(tenant_id):
current_date = dt.now(tz.utc).date()
license_usages = (db.session.query(LicenseUsage)
.filter_by(tenant_id=tenant_id)
.filter_by(and_(LicenseUsage.period_start_date <= current_date,
LicenseUsage.period_end_date >= current_date))
.filter(and_(LicenseUsage.period_start_date <= current_date,
LicenseUsage.period_end_date >= current_date))
.all())
if not license_usages:
active_license = (db.session.query(License).filter_by(tenant_id=tenant_id)
.filter_by(and_(License.start_date <= current_date,
License.end_date >= current_date))
.one())
.filter(and_(License.start_date <= current_date,
License.end_date >= current_date))
.one_or_none())
if not active_license:
current_app.logger.error(f"No License defined for {tenant_id}. "
f"Impossible to calculate license usage.")
raise EveAINoLicenseForTenant(message=f"No License defined for {tenant_id}. "
f"Impossible to calculate license usage.")
start_date, end_date = calculate_valid_period(current_date, active_license.period_start_date)
start_date, end_date = calculate_valid_period(current_date, active_license.start_date)
new_license_usage = LicenseUsage(period_start_date=start_date,
period_end_date=end_date,
license_id=active_license.id,
@@ -124,8 +147,8 @@ def get_relevant_license_usages(session, tenant_id, min_timestamp, max_timestamp
# Fetch LicenseUsage records where the log timestamps fall between period_start_date and period_end_date
return session.query(LicenseUsage).filter(
LicenseUsage.tenant_id == tenant_id,
LicenseUsage.period_start_date <= max_timestamp,
LicenseUsage.period_end_date >= min_timestamp
LicenseUsage.period_start_date <= max_timestamp.date(),
LicenseUsage.period_end_date >= min_timestamp.date()
).order_by(LicenseUsage.period_start_date).all()
@@ -136,7 +159,7 @@ def split_logs_by_license_usage(logs, license_usages):
for log in logs:
# Find the corresponding LicenseUsage for each log based on the timestamp
for license_usage in license_usages:
if license_usage.period_start_date <= log.timestamp <= license_usage.period_end_date:
if license_usage.period_start_date <= log.timestamp.date() <= license_usage.period_end_date:
logs_by_usage[license_usage.id].append(log)
break
@@ -181,7 +204,7 @@ def process_logs_for_license_usage(tenant_id, license_usage_id, logs):
log.license_usage_id = license_usage_id
# Update the LicenseUsage record with the accumulated values
license_usage.embedding_mb += embedding_mb_used
license_usage.embedding_mb_used += embedding_mb_used
license_usage.embedding_prompt_tokens_used += embedding_prompt_tokens_used
license_usage.embedding_completion_tokens_used += embedding_completion_tokens_used
license_usage.embedding_total_tokens_used += embedding_total_tokens_used
@@ -189,27 +212,31 @@ def process_logs_for_license_usage(tenant_id, license_usage_id, logs):
license_usage.interaction_completion_tokens_used += interaction_completion_tokens_used
license_usage.interaction_total_tokens_used += interaction_total_tokens_used
current_app.logger.debug(f"Processed logs for license usage {license_usage.id}:\n{license_usage}")
# Commit the updates to the LicenseUsage and log records
try:
db.session.add(license_usage)
db.session.add(logs)
for log in logs:
db.session.add(log)
db.session.commit()
except SQLAlchemyError as e:
db.session.rollback()
current_app.logger.error(f"Error trying to update license usage and logs for tenant {tenant_id}. ")
current_app.logger.error(f"Error trying to update license usage and logs for tenant {tenant_id}: {e}")
raise e
def recalculate_storage_for_tenant(tenant):
# Perform a SUM operation to get the total file size from document_versions
total_storage = db.session.execute(f"""
total_storage = db.session.execute(text(f"""
SELECT SUM(file_size)
FROM {tenant.id}.document_versions
""").scalar()
FROM document_version
""")).scalar()
current_app.logger.debug(f"Recalculating storage for tenant {tenant} - Total storage: {total_storage}")
# Update the LicenseUsage with the recalculated storage
license_usage = db.session.query(LicenseUsage).filter_by(tenant_id=tenant.id).first()
license_usage.storage_mb = total_storage / (1024 * 1024) # Convert bytes to MB
license_usage.storage_mb_used = total_storage
# Reset the dirty flag after recalculating
tenant.storage_dirty = False