Chunk size defined at Tenant level.

This commit is contained in:
Josako
2024-07-08 15:20:02 +02:00
parent f082e7a242
commit cbfa6d67bf
3 changed files with 13 additions and 5 deletions

View File

@@ -35,6 +35,9 @@ class Tenant(db.Model):
html_end_tags = db.Column(ARRAY(sa.String(10)), nullable=True, default=['p', 'li']) html_end_tags = db.Column(ARRAY(sa.String(10)), nullable=True, default=['p', 'li'])
html_included_elements = db.Column(ARRAY(sa.String(50)), nullable=True) html_included_elements = db.Column(ARRAY(sa.String(50)), nullable=True)
html_excluded_elements = db.Column(ARRAY(sa.String(50)), nullable=True) html_excluded_elements = db.Column(ARRAY(sa.String(50)), nullable=True)
min_chunk_size = db.Column(db.Integer, nullable=True, default=2000)
max_chunk_size = db.Column(db.Integer, nullable=True, default=3000)
# Embedding search variables # Embedding search variables
es_k = db.Column(db.Integer, nullable=True, default=5) es_k = db.Column(db.Integer, nullable=True, default=5)
@@ -77,6 +80,9 @@ class Tenant(db.Model):
'html_end_tags': self.html_end_tags, 'html_end_tags': self.html_end_tags,
'html_included_elements': self.html_included_elements, 'html_included_elements': self.html_included_elements,
'html_excluded_elements': self.html_excluded_elements, 'html_excluded_elements': self.html_excluded_elements,
'min_chunk_size': self.min_chunk_size,
'max_chunk_size': self.max_chunk_size,
'es_k'
'es_k': self.es_k, 'es_k': self.es_k,
'es_similarity_threshold': self.es_similarity_threshold, 'es_similarity_threshold': self.es_similarity_threshold,
'chat_RAG_temperature': self.chat_RAG_temperature, 'chat_RAG_temperature': self.chat_RAG_temperature,

View File

@@ -31,8 +31,8 @@
</a> </a>
</li> </li>
<li class="nav-item"> <li class="nav-item">
<a class="nav-link mb-0 px-0 py-1" data-toggle="tab" href="#html-chunking-tab" role="tab" aria-controls="html-chunking" aria-selected="false"> <a class="nav-link mb-0 px-0 py-1" data-toggle="tab" href="#chunking-tab" role="tab" aria-controls="chunking" aria-selected="false">
HTML Chunking Chunking
</a> </a>
</li> </li>
<li class="nav-item"> <li class="nav-item">
@@ -71,9 +71,9 @@
<p id="copy-message" style="display:none;color:green;">API key copied to clipboard</p> <p id="copy-message" style="display:none;color:green;">API key copied to clipboard</p>
</div> </div>
</div> </div>
<!-- HTML Chunking Settings Tab --> <!-- Chunking Settings Tab -->
<div class="tab-pane fade" id="html-chunking-tab" role="tabpanel"> <div class="tab-pane fade" id="chunking-tab" role="tabpanel">
{% set html_fields = ['html_tags', 'html_end_tags', 'html_included_elements', 'html_excluded_elements', ] %} {% set html_fields = ['html_tags', 'html_end_tags', 'html_included_elements', 'html_excluded_elements', 'min_chunk_size', 'max_chunk_size'] %}
{% for field in form %} {% for field in form %}
{{ render_included_field(field, disabled_fields=html_fields, include_fields=html_fields) }} {{ render_included_field(field, disabled_fields=html_fields, include_fields=html_fields) }}
{% endfor %} {% endfor %}

View File

@@ -32,6 +32,8 @@ class TenantForm(FlaskForm):
default='p, li') default='p, li')
html_included_elements = StringField('HTML Included Elements', validators=[Optional()]) html_included_elements = StringField('HTML Included Elements', validators=[Optional()])
html_excluded_elements = StringField('HTML Excluded Elements', validators=[Optional()]) html_excluded_elements = StringField('HTML Excluded Elements', validators=[Optional()])
min_chunk_size = IntegerField('Minimum Chunk Size (2000)', validators=[NumberRange(min=0), Optional()], default=2000)
max_chunk_size = IntegerField('Maximum Chunk Size (3000)', validators=[NumberRange(min=0), Optional()], default=3000)
# Embedding Search variables # Embedding Search variables
es_k = IntegerField('Limit for Searching Embeddings (5)', es_k = IntegerField('Limit for Searching Embeddings (5)',
default=5, default=5,