- Revisiting RAG_SPECIALIST

- Adapt Catalogs & Retrievers to use specific types, removing tagging_fields
- Adding CrewAI Implementation Guide
This commit is contained in:
Josako
2025-07-08 15:54:16 +02:00
parent 33b5742d2f
commit 509ee95d81
32 changed files with 997 additions and 825 deletions

View File

@@ -11,6 +11,7 @@ class Catalog(db.Model):
name = db.Column(db.String(50), nullable=False, unique=True)
description = db.Column(db.Text, nullable=True)
type = db.Column(db.String(50), nullable=False, default="STANDARD_CATALOG")
type_version = db.Column(db.String(20), nullable=True, default="1.0.0")
min_chunk_size = db.Column(db.Integer, nullable=True, default=1500)
max_chunk_size = db.Column(db.Integer, nullable=True, default=2500)
@@ -26,6 +27,20 @@ class Catalog(db.Model):
updated_at = db.Column(db.DateTime, nullable=False, server_default=db.func.now(), onupdate=db.func.now())
updated_by = db.Column(db.Integer, db.ForeignKey(User.id))
def to_dict(self):
return {
'id': self.id,
'name': self.name,
'description': self.description,
'type': self.type,
'type_version': self.type_version,
'min_chunk_size': self.min_chunk_size,
'max_chunk_size': self.max_chunk_size,
'user_metadata': self.user_metadata,
'system_metadata': self.system_metadata,
'configuration': self.configuration,
}
class Processor(db.Model):
id = db.Column(db.Integer, primary_key=True)

View File

@@ -47,101 +47,101 @@ class TenantServices:
current_app.logger.error(f"Error associating tenant {tenant_id} with partner: {str(e)}")
raise e
@staticmethod
def get_available_types_for_tenant(tenant_id: int, config_type: str) -> Dict[str, Dict[str, str]]:
"""
Get available configuration types for a tenant based on partner relationships
@staticmethod
def get_available_types_for_tenant(tenant_id: int, config_type: str) -> Dict[str, Dict[str, str]]:
"""
Get available configuration types for a tenant based on partner relationships
Args:
tenant_id: The tenant ID
config_type: The configuration type ('specialists', 'agents', 'tasks', etc.)
Args:
tenant_id: The tenant ID
config_type: The configuration type ('specialists', 'agents', 'tasks', etc.)
Returns:
Dictionary of available types for the tenant
"""
# Get the appropriate cache handler based on config_type
cache_handler = None
if config_type == 'specialists':
cache_handler = cache_manager.specialists_types_cache
elif config_type == 'agents':
cache_handler = cache_manager.agents_types_cache
elif config_type == 'tasks':
cache_handler = cache_manager.tasks_types_cache
elif config_type == 'tools':
cache_handler = cache_manager.tools_types_cache
else:
raise ValueError(f"Unsupported config type: {config_type}")
Returns:
Dictionary of available types for the tenant
"""
# Get the appropriate cache handler based on config_type
cache_handler = None
if config_type == 'specialists':
cache_handler = cache_manager.specialists_types_cache
elif config_type == 'agents':
cache_handler = cache_manager.agents_types_cache
elif config_type == 'tasks':
cache_handler = cache_manager.tasks_types_cache
elif config_type == 'tools':
cache_handler = cache_manager.tools_types_cache
elif config_type == 'catalogs':
cache_handler = cache_manager.catalogs_types_cache
elif config_type == 'retrievers':
cache_handler = cache_manager.retrievers_types_cache
else:
raise ValueError(f"Unsupported config type: {config_type}")
# Get all types with their metadata (including partner info)
all_types = cache_handler.get_types()
# Get all types with their metadata (including partner info)
all_types = cache_handler.get_types()
# Filter to include:
# 1. Types with no partner (global)
# 2. Types with partners that have a SPECIALIST_SERVICE relationship with this tenant
available_partners = TenantServices.get_tenant_partner_names(tenant_id)
# Filter to include:
# 1. Types with no partner (global)
# 2. Types with partners that have a SPECIALIST_SERVICE relationship with this tenant
available_partners = TenantServices.get_tenant_partner_specialist_denominators(tenant_id)
available_types = {
type_id: info for type_id, info in all_types.items()
if info.get('partner') is None or info.get('partner') in available_partners
}
available_types = {
type_id: info for type_id, info in all_types.items()
if info.get('partner') is None or info.get('partner') in available_partners
}
return available_types
return available_types
@staticmethod
def get_tenant_partner_names(tenant_id: int) -> List[str]:
"""
Get names of partners that have a SPECIALIST_SERVICE relationship with this tenant
@staticmethod
def get_tenant_partner_specialist_denominators(tenant_id: int) -> List[str]:
"""
Get names of partners that have a SPECIALIST_SERVICE relationship with this tenant, that can be used for
filtering configurations.
Args:
tenant_id: The tenant ID
Args:
tenant_id: The tenant ID
Returns:
List of partner names (tenant names)
"""
# Find all PartnerTenant relationships for this tenant
partner_names = []
try:
# Get all partner services of type SPECIALIST_SERVICE
specialist_services = (
Returns:
List of partner names (tenant names)
"""
# Find all PartnerTenant relationships for this tenant
partner_service_denominators = []
try:
# Get all partner services of type SPECIALIST_SERVICE
specialist_services = (
PartnerService.query
.filter_by(type='SPECIALIST_SERVICE')
.all()
)
if not specialist_services:
return []
# Find tenant relationships with these services
partner_tenants = (
PartnerTenant.query
.filter_by(tenant_id=tenant_id)
.filter(PartnerTenant.partner_service_id.in_([svc.id for svc in specialist_services]))
.all()
)
# Get the partner names (their tenant names)
for pt in partner_tenants:
partner_service = (
PartnerService.query
.filter_by(type='SPECIALIST_SERVICE')
.all()
.filter_by(id=pt.partner_service_id)
.first()
)
if not specialist_services:
return []
if partner_service:
partner_service_denominators.append(partner_service.configuration.get("specialist_denominator", ""))
# Find tenant relationships with these services
partner_tenants = (
PartnerTenant.query
.filter_by(tenant_id=tenant_id)
.filter(PartnerTenant.partner_service_id.in_([svc.id for svc in specialist_services]))
.all()
)
except SQLAlchemyError as e:
current_app.logger.error(f"Database error retrieving partner names: {str(e)}")
# Get the partner names (their tenant names)
for pt in partner_tenants:
partner_service = (
PartnerService.query
.filter_by(id=pt.partner_service_id)
.first()
)
return partner_service_denominators
if partner_service:
partner = Partner.query.get(partner_service.partner_id)
if partner:
# Get the tenant associated with this partner
partner_tenant = Tenant.query.get(partner.tenant_id)
if partner_tenant:
partner_names.append(partner_tenant.name)
except SQLAlchemyError as e:
current_app.logger.error(f"Database error retrieving partner names: {str(e)}")
return partner_names
@staticmethod
def can_use_specialist_type(tenant_id: int, specialist_type: str) -> bool:
@staticmethod
def can_use_specialist_type(tenant_id: int, specialist_type: str) -> bool:
"""
Check if a tenant can use a specific specialist type
@@ -166,7 +166,7 @@ class TenantServices:
# If it's a partner-specific specialist, check if tenant has access
partner_name = specialist_def.get('partner')
available_partners = TenantServices.get_tenant_partner_names(tenant_id)
available_partners = TenantServices.get_tenant_partner_specialist_denominators(tenant_id)
return partner_name in available_partners

View File

@@ -332,24 +332,22 @@ class BaseConfigTypesCacheHandler(CacheHandler[Dict[str, Any]]):
"""
return isinstance(value, dict) # Cache all dictionaries
def _load_type_definitions(self) -> Dict[str, Dict[str, str]]:
def _load_type_definitions(self) -> Dict[str, Dict[str, Any]]:
"""Load type definitions from the corresponding type_defs module"""
if not self._types_module:
raise ValueError("_types_module must be set by subclass")
type_definitions = {
type_id: {
'name': info['name'],
'description': info['description'],
'partner': info.get('partner') # Include partner info if available
}
for type_id, info in self._types_module.items()
}
type_definitions = {}
for type_id, info in self._types_module.items():
# Kopieer alle velden uit de type definitie
type_definitions[type_id] = {}
for key, value in info.items():
type_definitions[type_id][key] = value
return type_definitions
def get_types(self) -> Dict[str, Dict[str, str]]:
"""Get dictionary of available types with name and description"""
def get_types(self) -> Dict[str, Dict[str, Any]]:
"""Get dictionary of available types with all defined properties"""
result = self.get(
lambda type_name: self._load_type_definitions(),
type_name=f'{self.config_type}_types',